bac-accent-transcriber / trainer_state.json
vedaradhak's picture
Upload 9 files
81e2132 verified
raw
history blame
29.4 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 6.420545746388443,
"eval_steps": 500,
"global_step": 8000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.04012841091492777,
"grad_norm": 86.5052261352539,
"learning_rate": 5.750000000000001e-07,
"loss": 2.891,
"step": 50
},
{
"epoch": 0.08025682182985554,
"grad_norm": 44.9648551940918,
"learning_rate": 1.2000000000000002e-06,
"loss": 2.0127,
"step": 100
},
{
"epoch": 0.12038523274478331,
"grad_norm": 32.67415237426758,
"learning_rate": 1.825e-06,
"loss": 1.3251,
"step": 150
},
{
"epoch": 0.16051364365971107,
"grad_norm": 33.664764404296875,
"learning_rate": 2.4500000000000003e-06,
"loss": 1.139,
"step": 200
},
{
"epoch": 0.20064205457463885,
"grad_norm": 38.86428451538086,
"learning_rate": 3.075e-06,
"loss": 0.9913,
"step": 250
},
{
"epoch": 0.24077046548956663,
"grad_norm": 38.11140441894531,
"learning_rate": 3.7e-06,
"loss": 0.9344,
"step": 300
},
{
"epoch": 0.2808988764044944,
"grad_norm": 39.1014518737793,
"learning_rate": 4.325e-06,
"loss": 0.8068,
"step": 350
},
{
"epoch": 0.32102728731942215,
"grad_norm": 19.286678314208984,
"learning_rate": 4.95e-06,
"loss": 0.7104,
"step": 400
},
{
"epoch": 0.3611556982343499,
"grad_norm": 24.21994972229004,
"learning_rate": 5.575000000000001e-06,
"loss": 0.6199,
"step": 450
},
{
"epoch": 0.4012841091492777,
"grad_norm": 23.403148651123047,
"learning_rate": 6.200000000000001e-06,
"loss": 0.6139,
"step": 500
},
{
"epoch": 0.44141252006420545,
"grad_norm": 21.166723251342773,
"learning_rate": 6.825000000000001e-06,
"loss": 0.5843,
"step": 550
},
{
"epoch": 0.48154093097913325,
"grad_norm": 18.9536075592041,
"learning_rate": 7.450000000000001e-06,
"loss": 0.578,
"step": 600
},
{
"epoch": 0.521669341894061,
"grad_norm": 21.438701629638672,
"learning_rate": 8.075000000000001e-06,
"loss": 0.5505,
"step": 650
},
{
"epoch": 0.5617977528089888,
"grad_norm": 25.26392364501953,
"learning_rate": 8.700000000000001e-06,
"loss": 0.5649,
"step": 700
},
{
"epoch": 0.6019261637239165,
"grad_norm": 19.77715492248535,
"learning_rate": 9.325000000000001e-06,
"loss": 0.5839,
"step": 750
},
{
"epoch": 0.6420545746388443,
"grad_norm": 20.142839431762695,
"learning_rate": 9.950000000000001e-06,
"loss": 0.5518,
"step": 800
},
{
"epoch": 0.6821829855537721,
"grad_norm": 20.156387329101562,
"learning_rate": 9.936111111111112e-06,
"loss": 0.5514,
"step": 850
},
{
"epoch": 0.7223113964686998,
"grad_norm": 21.246883392333984,
"learning_rate": 9.866666666666668e-06,
"loss": 0.5125,
"step": 900
},
{
"epoch": 0.7624398073836276,
"grad_norm": 17.57798957824707,
"learning_rate": 9.797222222222223e-06,
"loss": 0.5129,
"step": 950
},
{
"epoch": 0.8025682182985554,
"grad_norm": 18.979021072387695,
"learning_rate": 9.727777777777777e-06,
"loss": 0.493,
"step": 1000
},
{
"epoch": 0.8426966292134831,
"grad_norm": 20.188549041748047,
"learning_rate": 9.658333333333334e-06,
"loss": 0.5158,
"step": 1050
},
{
"epoch": 0.8828250401284109,
"grad_norm": 17.60784149169922,
"learning_rate": 9.58888888888889e-06,
"loss": 0.4901,
"step": 1100
},
{
"epoch": 0.9229534510433387,
"grad_norm": 38.89186477661133,
"learning_rate": 9.519444444444446e-06,
"loss": 0.493,
"step": 1150
},
{
"epoch": 0.9630818619582665,
"grad_norm": 20.224136352539062,
"learning_rate": 9.450000000000001e-06,
"loss": 0.4911,
"step": 1200
},
{
"epoch": 1.0032102728731942,
"grad_norm": 13.886350631713867,
"learning_rate": 9.380555555555556e-06,
"loss": 0.4649,
"step": 1250
},
{
"epoch": 1.043338683788122,
"grad_norm": 14.338887214660645,
"learning_rate": 9.311111111111112e-06,
"loss": 0.3867,
"step": 1300
},
{
"epoch": 1.0834670947030498,
"grad_norm": 17.406949996948242,
"learning_rate": 9.241666666666668e-06,
"loss": 0.3609,
"step": 1350
},
{
"epoch": 1.1235955056179776,
"grad_norm": 19.521095275878906,
"learning_rate": 9.172222222222223e-06,
"loss": 0.354,
"step": 1400
},
{
"epoch": 1.1637239165329052,
"grad_norm": 14.558744430541992,
"learning_rate": 9.102777777777777e-06,
"loss": 0.357,
"step": 1450
},
{
"epoch": 1.203852327447833,
"grad_norm": 13.476064682006836,
"learning_rate": 9.033333333333334e-06,
"loss": 0.3593,
"step": 1500
},
{
"epoch": 1.2439807383627608,
"grad_norm": 15.735109329223633,
"learning_rate": 8.96388888888889e-06,
"loss": 0.3567,
"step": 1550
},
{
"epoch": 1.2841091492776886,
"grad_norm": 17.4168758392334,
"learning_rate": 8.894444444444445e-06,
"loss": 0.3534,
"step": 1600
},
{
"epoch": 1.3242375601926164,
"grad_norm": 17.488101959228516,
"learning_rate": 8.825000000000001e-06,
"loss": 0.3481,
"step": 1650
},
{
"epoch": 1.3643659711075442,
"grad_norm": 14.388931274414062,
"learning_rate": 8.755555555555556e-06,
"loss": 0.3517,
"step": 1700
},
{
"epoch": 1.404494382022472,
"grad_norm": 14.942325592041016,
"learning_rate": 8.686111111111112e-06,
"loss": 0.3361,
"step": 1750
},
{
"epoch": 1.4446227929373996,
"grad_norm": 19.468101501464844,
"learning_rate": 8.616666666666668e-06,
"loss": 0.349,
"step": 1800
},
{
"epoch": 1.4847512038523274,
"grad_norm": 17.254077911376953,
"learning_rate": 8.547222222222223e-06,
"loss": 0.3593,
"step": 1850
},
{
"epoch": 1.5248796147672552,
"grad_norm": 14.908585548400879,
"learning_rate": 8.477777777777778e-06,
"loss": 0.3464,
"step": 1900
},
{
"epoch": 1.565008025682183,
"grad_norm": 14.53330135345459,
"learning_rate": 8.408333333333334e-06,
"loss": 0.3431,
"step": 1950
},
{
"epoch": 1.6051364365971108,
"grad_norm": 16.17108154296875,
"learning_rate": 8.33888888888889e-06,
"loss": 0.3272,
"step": 2000
},
{
"epoch": 1.6452648475120384,
"grad_norm": 17.363994598388672,
"learning_rate": 8.269444444444445e-06,
"loss": 0.354,
"step": 2050
},
{
"epoch": 1.6853932584269664,
"grad_norm": 15.006171226501465,
"learning_rate": 8.2e-06,
"loss": 0.3399,
"step": 2100
},
{
"epoch": 1.725521669341894,
"grad_norm": 19.402164459228516,
"learning_rate": 8.130555555555556e-06,
"loss": 0.3507,
"step": 2150
},
{
"epoch": 1.7656500802568218,
"grad_norm": 18.973295211791992,
"learning_rate": 8.061111111111112e-06,
"loss": 0.3384,
"step": 2200
},
{
"epoch": 1.8057784911717496,
"grad_norm": 16.426692962646484,
"learning_rate": 7.991666666666668e-06,
"loss": 0.3475,
"step": 2250
},
{
"epoch": 1.8459069020866774,
"grad_norm": 15.58637523651123,
"learning_rate": 7.922222222222223e-06,
"loss": 0.3392,
"step": 2300
},
{
"epoch": 1.8860353130016052,
"grad_norm": 22.870681762695312,
"learning_rate": 7.852777777777778e-06,
"loss": 0.3379,
"step": 2350
},
{
"epoch": 1.9261637239165328,
"grad_norm": 13.37627124786377,
"learning_rate": 7.783333333333334e-06,
"loss": 0.3397,
"step": 2400
},
{
"epoch": 1.9662921348314608,
"grad_norm": 13.650712013244629,
"learning_rate": 7.71388888888889e-06,
"loss": 0.3325,
"step": 2450
},
{
"epoch": 2.0064205457463884,
"grad_norm": 11.761895179748535,
"learning_rate": 7.644444444444445e-06,
"loss": 0.3198,
"step": 2500
},
{
"epoch": 2.0465489566613164,
"grad_norm": 10.545045852661133,
"learning_rate": 7.575e-06,
"loss": 0.2247,
"step": 2550
},
{
"epoch": 2.086677367576244,
"grad_norm": 14.525964736938477,
"learning_rate": 7.505555555555556e-06,
"loss": 0.2106,
"step": 2600
},
{
"epoch": 2.1268057784911716,
"grad_norm": 12.788507461547852,
"learning_rate": 7.436111111111112e-06,
"loss": 0.2179,
"step": 2650
},
{
"epoch": 2.1669341894060996,
"grad_norm": 15.106952667236328,
"learning_rate": 7.3666666666666676e-06,
"loss": 0.23,
"step": 2700
},
{
"epoch": 2.207062600321027,
"grad_norm": 14.04389762878418,
"learning_rate": 7.297222222222223e-06,
"loss": 0.2156,
"step": 2750
},
{
"epoch": 2.247191011235955,
"grad_norm": 17.352598190307617,
"learning_rate": 7.227777777777778e-06,
"loss": 0.2175,
"step": 2800
},
{
"epoch": 2.287319422150883,
"grad_norm": 11.569601058959961,
"learning_rate": 7.158333333333334e-06,
"loss": 0.2079,
"step": 2850
},
{
"epoch": 2.3274478330658104,
"grad_norm": 14.02609634399414,
"learning_rate": 7.0888888888888894e-06,
"loss": 0.2307,
"step": 2900
},
{
"epoch": 2.3675762439807384,
"grad_norm": 15.397391319274902,
"learning_rate": 7.019444444444446e-06,
"loss": 0.211,
"step": 2950
},
{
"epoch": 2.407704654895666,
"grad_norm": 13.850847244262695,
"learning_rate": 6.95e-06,
"loss": 0.2115,
"step": 3000
},
{
"epoch": 2.447833065810594,
"grad_norm": 10.497112274169922,
"learning_rate": 6.880555555555556e-06,
"loss": 0.2071,
"step": 3050
},
{
"epoch": 2.4879614767255216,
"grad_norm": 13.437689781188965,
"learning_rate": 6.811111111111111e-06,
"loss": 0.2085,
"step": 3100
},
{
"epoch": 2.5280898876404496,
"grad_norm": 17.070728302001953,
"learning_rate": 6.741666666666668e-06,
"loss": 0.2022,
"step": 3150
},
{
"epoch": 2.568218298555377,
"grad_norm": 14.102376937866211,
"learning_rate": 6.672222222222223e-06,
"loss": 0.2014,
"step": 3200
},
{
"epoch": 2.608346709470305,
"grad_norm": 12.899041175842285,
"learning_rate": 6.602777777777778e-06,
"loss": 0.2172,
"step": 3250
},
{
"epoch": 2.648475120385233,
"grad_norm": 15.379789352416992,
"learning_rate": 6.533333333333334e-06,
"loss": 0.2096,
"step": 3300
},
{
"epoch": 2.6886035313001604,
"grad_norm": 12.808381080627441,
"learning_rate": 6.4638888888888895e-06,
"loss": 0.2154,
"step": 3350
},
{
"epoch": 2.7287319422150884,
"grad_norm": 13.826408386230469,
"learning_rate": 6.394444444444445e-06,
"loss": 0.2176,
"step": 3400
},
{
"epoch": 2.768860353130016,
"grad_norm": 14.112689971923828,
"learning_rate": 6.3250000000000004e-06,
"loss": 0.2079,
"step": 3450
},
{
"epoch": 2.808988764044944,
"grad_norm": 11.431640625,
"learning_rate": 6.255555555555556e-06,
"loss": 0.2152,
"step": 3500
},
{
"epoch": 2.8491171749598716,
"grad_norm": 17.434322357177734,
"learning_rate": 6.186111111111111e-06,
"loss": 0.189,
"step": 3550
},
{
"epoch": 2.889245585874799,
"grad_norm": 15.394646644592285,
"learning_rate": 6.116666666666668e-06,
"loss": 0.2078,
"step": 3600
},
{
"epoch": 2.929373996789727,
"grad_norm": 19.227445602416992,
"learning_rate": 6.047222222222223e-06,
"loss": 0.2104,
"step": 3650
},
{
"epoch": 2.969502407704655,
"grad_norm": 14.099166870117188,
"learning_rate": 5.977777777777778e-06,
"loss": 0.2252,
"step": 3700
},
{
"epoch": 3.009630818619583,
"grad_norm": 11.261530876159668,
"learning_rate": 5.908333333333334e-06,
"loss": 0.1895,
"step": 3750
},
{
"epoch": 3.0497592295345104,
"grad_norm": 8.676029205322266,
"learning_rate": 5.8388888888888895e-06,
"loss": 0.133,
"step": 3800
},
{
"epoch": 3.0898876404494384,
"grad_norm": 8.65832233428955,
"learning_rate": 5.769444444444445e-06,
"loss": 0.1152,
"step": 3850
},
{
"epoch": 3.130016051364366,
"grad_norm": 9.719037055969238,
"learning_rate": 5.7e-06,
"loss": 0.1261,
"step": 3900
},
{
"epoch": 3.1701444622792936,
"grad_norm": 12.145017623901367,
"learning_rate": 5.630555555555556e-06,
"loss": 0.1216,
"step": 3950
},
{
"epoch": 3.2102728731942216,
"grad_norm": 10.67035961151123,
"learning_rate": 5.561111111111111e-06,
"loss": 0.1341,
"step": 4000
},
{
"epoch": 3.250401284109149,
"grad_norm": 13.00109577178955,
"learning_rate": 5.491666666666668e-06,
"loss": 0.1294,
"step": 4050
},
{
"epoch": 3.290529695024077,
"grad_norm": 34.79175567626953,
"learning_rate": 5.422222222222223e-06,
"loss": 0.1287,
"step": 4100
},
{
"epoch": 3.330658105939005,
"grad_norm": 8.047740936279297,
"learning_rate": 5.352777777777778e-06,
"loss": 0.1282,
"step": 4150
},
{
"epoch": 3.370786516853933,
"grad_norm": 10.46933364868164,
"learning_rate": 5.283333333333333e-06,
"loss": 0.126,
"step": 4200
},
{
"epoch": 3.4109149277688604,
"grad_norm": 9.083063125610352,
"learning_rate": 5.21388888888889e-06,
"loss": 0.1322,
"step": 4250
},
{
"epoch": 3.451043338683788,
"grad_norm": 10.835976600646973,
"learning_rate": 5.144444444444445e-06,
"loss": 0.1393,
"step": 4300
},
{
"epoch": 3.491171749598716,
"grad_norm": 9.417113304138184,
"learning_rate": 5.075e-06,
"loss": 0.1322,
"step": 4350
},
{
"epoch": 3.5313001605136436,
"grad_norm": 11.396635055541992,
"learning_rate": 5.005555555555556e-06,
"loss": 0.1369,
"step": 4400
},
{
"epoch": 3.571428571428571,
"grad_norm": 16.883840560913086,
"learning_rate": 4.9361111111111115e-06,
"loss": 0.1301,
"step": 4450
},
{
"epoch": 3.611556982343499,
"grad_norm": 16.863872528076172,
"learning_rate": 4.866666666666667e-06,
"loss": 0.1383,
"step": 4500
},
{
"epoch": 3.6516853932584272,
"grad_norm": 11.84510612487793,
"learning_rate": 4.797222222222222e-06,
"loss": 0.1288,
"step": 4550
},
{
"epoch": 3.691813804173355,
"grad_norm": 10.211877822875977,
"learning_rate": 4.727777777777779e-06,
"loss": 0.1327,
"step": 4600
},
{
"epoch": 3.7319422150882824,
"grad_norm": 11.919416427612305,
"learning_rate": 4.658333333333333e-06,
"loss": 0.1306,
"step": 4650
},
{
"epoch": 3.7720706260032104,
"grad_norm": 10.668038368225098,
"learning_rate": 4.58888888888889e-06,
"loss": 0.1432,
"step": 4700
},
{
"epoch": 3.812199036918138,
"grad_norm": 9.114903450012207,
"learning_rate": 4.519444444444444e-06,
"loss": 0.1313,
"step": 4750
},
{
"epoch": 3.8523274478330656,
"grad_norm": 9.845243453979492,
"learning_rate": 4.450000000000001e-06,
"loss": 0.1351,
"step": 4800
},
{
"epoch": 3.8924558587479936,
"grad_norm": 10.04245376586914,
"learning_rate": 4.380555555555556e-06,
"loss": 0.1242,
"step": 4850
},
{
"epoch": 3.932584269662921,
"grad_norm": 11.454913139343262,
"learning_rate": 4.3111111111111115e-06,
"loss": 0.1327,
"step": 4900
},
{
"epoch": 3.972712680577849,
"grad_norm": 9.704380989074707,
"learning_rate": 4.241666666666667e-06,
"loss": 0.136,
"step": 4950
},
{
"epoch": 4.012841091492777,
"grad_norm": 8.607508659362793,
"learning_rate": 4.1722222222222225e-06,
"loss": 0.1057,
"step": 5000
},
{
"epoch": 4.052969502407705,
"grad_norm": 9.908164024353027,
"learning_rate": 4.102777777777778e-06,
"loss": 0.0793,
"step": 5050
},
{
"epoch": 4.093097913322633,
"grad_norm": 9.310582160949707,
"learning_rate": 4.033333333333333e-06,
"loss": 0.0819,
"step": 5100
},
{
"epoch": 4.13322632423756,
"grad_norm": 8.358500480651855,
"learning_rate": 3.96388888888889e-06,
"loss": 0.0713,
"step": 5150
},
{
"epoch": 4.173354735152488,
"grad_norm": 10.421764373779297,
"learning_rate": 3.894444444444444e-06,
"loss": 0.0882,
"step": 5200
},
{
"epoch": 4.213483146067416,
"grad_norm": 9.101556777954102,
"learning_rate": 3.825000000000001e-06,
"loss": 0.0795,
"step": 5250
},
{
"epoch": 4.253611556982343,
"grad_norm": 8.120292663574219,
"learning_rate": 3.7555555555555557e-06,
"loss": 0.0837,
"step": 5300
},
{
"epoch": 4.293739967897271,
"grad_norm": 7.72833251953125,
"learning_rate": 3.6861111111111116e-06,
"loss": 0.0834,
"step": 5350
},
{
"epoch": 4.333868378812199,
"grad_norm": 8.289836883544922,
"learning_rate": 3.616666666666667e-06,
"loss": 0.0787,
"step": 5400
},
{
"epoch": 4.373996789727126,
"grad_norm": 11.09408950805664,
"learning_rate": 3.5472222222222225e-06,
"loss": 0.0838,
"step": 5450
},
{
"epoch": 4.414125200642054,
"grad_norm": 5.381303310394287,
"learning_rate": 3.4777777777777784e-06,
"loss": 0.08,
"step": 5500
},
{
"epoch": 4.454253611556982,
"grad_norm": 7.058831214904785,
"learning_rate": 3.4083333333333335e-06,
"loss": 0.0811,
"step": 5550
},
{
"epoch": 4.49438202247191,
"grad_norm": 7.9168620109558105,
"learning_rate": 3.3388888888888893e-06,
"loss": 0.0796,
"step": 5600
},
{
"epoch": 4.534510433386838,
"grad_norm": 8.293107032775879,
"learning_rate": 3.2694444444444444e-06,
"loss": 0.0765,
"step": 5650
},
{
"epoch": 4.574638844301766,
"grad_norm": 14.970193862915039,
"learning_rate": 3.2000000000000003e-06,
"loss": 0.0786,
"step": 5700
},
{
"epoch": 4.614767255216694,
"grad_norm": 7.619186878204346,
"learning_rate": 3.1305555555555557e-06,
"loss": 0.079,
"step": 5750
},
{
"epoch": 4.654895666131621,
"grad_norm": 7.955104827880859,
"learning_rate": 3.0611111111111112e-06,
"loss": 0.0814,
"step": 5800
},
{
"epoch": 4.695024077046549,
"grad_norm": 6.463953971862793,
"learning_rate": 2.991666666666667e-06,
"loss": 0.0846,
"step": 5850
},
{
"epoch": 4.735152487961477,
"grad_norm": 7.264988422393799,
"learning_rate": 2.9222222222222226e-06,
"loss": 0.0846,
"step": 5900
},
{
"epoch": 4.775280898876405,
"grad_norm": 9.477662086486816,
"learning_rate": 2.852777777777778e-06,
"loss": 0.0737,
"step": 5950
},
{
"epoch": 4.815409309791332,
"grad_norm": 7.392651557922363,
"learning_rate": 2.7833333333333335e-06,
"loss": 0.0819,
"step": 6000
},
{
"epoch": 4.85553772070626,
"grad_norm": 7.89952278137207,
"learning_rate": 2.7138888888888894e-06,
"loss": 0.0733,
"step": 6050
},
{
"epoch": 4.895666131621188,
"grad_norm": 9.701936721801758,
"learning_rate": 2.6444444444444444e-06,
"loss": 0.0766,
"step": 6100
},
{
"epoch": 4.935794542536115,
"grad_norm": 9.0658540725708,
"learning_rate": 2.5750000000000003e-06,
"loss": 0.0719,
"step": 6150
},
{
"epoch": 4.975922953451043,
"grad_norm": 8.084450721740723,
"learning_rate": 2.5055555555555554e-06,
"loss": 0.0797,
"step": 6200
},
{
"epoch": 5.016051364365971,
"grad_norm": 9.634577751159668,
"learning_rate": 2.4361111111111113e-06,
"loss": 0.0728,
"step": 6250
},
{
"epoch": 5.056179775280899,
"grad_norm": 6.3712921142578125,
"learning_rate": 2.3666666666666667e-06,
"loss": 0.0492,
"step": 6300
},
{
"epoch": 5.096308186195826,
"grad_norm": 5.370436191558838,
"learning_rate": 2.297222222222222e-06,
"loss": 0.0462,
"step": 6350
},
{
"epoch": 5.136436597110754,
"grad_norm": 5.955212116241455,
"learning_rate": 2.2277777777777777e-06,
"loss": 0.0458,
"step": 6400
},
{
"epoch": 5.176565008025682,
"grad_norm": 5.898159503936768,
"learning_rate": 2.1583333333333336e-06,
"loss": 0.0521,
"step": 6450
},
{
"epoch": 5.21669341894061,
"grad_norm": 4.376798152923584,
"learning_rate": 2.088888888888889e-06,
"loss": 0.0539,
"step": 6500
},
{
"epoch": 5.256821829855538,
"grad_norm": 8.122651100158691,
"learning_rate": 2.0194444444444445e-06,
"loss": 0.0532,
"step": 6550
},
{
"epoch": 5.296950240770466,
"grad_norm": 6.136049270629883,
"learning_rate": 1.9500000000000004e-06,
"loss": 0.049,
"step": 6600
},
{
"epoch": 5.337078651685394,
"grad_norm": 4.084792613983154,
"learning_rate": 1.8805555555555556e-06,
"loss": 0.0606,
"step": 6650
},
{
"epoch": 5.377207062600321,
"grad_norm": 10.023465156555176,
"learning_rate": 1.8111111111111113e-06,
"loss": 0.0528,
"step": 6700
},
{
"epoch": 5.417335473515249,
"grad_norm": 7.965837478637695,
"learning_rate": 1.7416666666666668e-06,
"loss": 0.0495,
"step": 6750
},
{
"epoch": 5.457463884430177,
"grad_norm": 4.469130516052246,
"learning_rate": 1.6722222222222223e-06,
"loss": 0.0498,
"step": 6800
},
{
"epoch": 5.497592295345104,
"grad_norm": 7.078831672668457,
"learning_rate": 1.6027777777777777e-06,
"loss": 0.0553,
"step": 6850
},
{
"epoch": 5.537720706260032,
"grad_norm": 6.6740336418151855,
"learning_rate": 1.5333333333333334e-06,
"loss": 0.0476,
"step": 6900
},
{
"epoch": 5.57784911717496,
"grad_norm": 4.432163715362549,
"learning_rate": 1.463888888888889e-06,
"loss": 0.0482,
"step": 6950
},
{
"epoch": 5.617977528089888,
"grad_norm": 7.402093410491943,
"learning_rate": 1.3944444444444446e-06,
"loss": 0.043,
"step": 7000
},
{
"epoch": 5.658105939004815,
"grad_norm": 5.417418003082275,
"learning_rate": 1.3250000000000002e-06,
"loss": 0.0438,
"step": 7050
},
{
"epoch": 5.698234349919743,
"grad_norm": 5.910097599029541,
"learning_rate": 1.2555555555555557e-06,
"loss": 0.0456,
"step": 7100
},
{
"epoch": 5.738362760834671,
"grad_norm": 7.781131744384766,
"learning_rate": 1.1861111111111112e-06,
"loss": 0.0504,
"step": 7150
},
{
"epoch": 5.778491171749598,
"grad_norm": 7.682769298553467,
"learning_rate": 1.1166666666666666e-06,
"loss": 0.0543,
"step": 7200
},
{
"epoch": 5.818619582664526,
"grad_norm": 5.041004657745361,
"learning_rate": 1.0472222222222223e-06,
"loss": 0.0477,
"step": 7250
},
{
"epoch": 5.858747993579454,
"grad_norm": 5.9667158126831055,
"learning_rate": 9.77777777777778e-07,
"loss": 0.0472,
"step": 7300
},
{
"epoch": 5.898876404494382,
"grad_norm": 7.793878555297852,
"learning_rate": 9.083333333333335e-07,
"loss": 0.0507,
"step": 7350
},
{
"epoch": 5.93900481540931,
"grad_norm": 7.589353561401367,
"learning_rate": 8.388888888888889e-07,
"loss": 0.0588,
"step": 7400
},
{
"epoch": 5.979133226324238,
"grad_norm": 6.20251989364624,
"learning_rate": 7.694444444444445e-07,
"loss": 0.0499,
"step": 7450
},
{
"epoch": 6.019261637239166,
"grad_norm": 6.208334445953369,
"learning_rate": 7.000000000000001e-07,
"loss": 0.0441,
"step": 7500
},
{
"epoch": 6.059390048154093,
"grad_norm": 4.3427324295043945,
"learning_rate": 6.305555555555556e-07,
"loss": 0.0404,
"step": 7550
},
{
"epoch": 6.099518459069021,
"grad_norm": 5.3076581954956055,
"learning_rate": 5.611111111111111e-07,
"loss": 0.0363,
"step": 7600
},
{
"epoch": 6.139646869983949,
"grad_norm": 3.9682233333587646,
"learning_rate": 4.916666666666667e-07,
"loss": 0.0382,
"step": 7650
},
{
"epoch": 6.179775280898877,
"grad_norm": 5.612052917480469,
"learning_rate": 4.2222222222222226e-07,
"loss": 0.0411,
"step": 7700
},
{
"epoch": 6.219903691813804,
"grad_norm": 4.144915580749512,
"learning_rate": 3.527777777777778e-07,
"loss": 0.0358,
"step": 7750
},
{
"epoch": 6.260032102728732,
"grad_norm": 7.28611946105957,
"learning_rate": 2.8333333333333336e-07,
"loss": 0.035,
"step": 7800
},
{
"epoch": 6.30016051364366,
"grad_norm": 4.628722667694092,
"learning_rate": 2.138888888888889e-07,
"loss": 0.0371,
"step": 7850
},
{
"epoch": 6.340288924558587,
"grad_norm": 5.527387619018555,
"learning_rate": 1.4444444444444445e-07,
"loss": 0.0377,
"step": 7900
},
{
"epoch": 6.380417335473515,
"grad_norm": 4.704113006591797,
"learning_rate": 7.500000000000001e-08,
"loss": 0.0352,
"step": 7950
},
{
"epoch": 6.420545746388443,
"grad_norm": 4.203430652618408,
"learning_rate": 5.555555555555556e-09,
"loss": 0.0341,
"step": 8000
}
],
"logging_steps": 50,
"max_steps": 8000,
"num_input_tokens_seen": 0,
"num_train_epochs": 7,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 7.38692049199104e+19,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}