fine2006's picture
Upload folder using huggingface_hub
557353a verified
{
"best_global_step": 1000,
"best_metric": 168.64974807007812,
"best_model_checkpoint": "./whisper-small-finetuned-multilingual-on-kaggle-v2/checkpoint-1000",
"epoch": 4.509582863585118,
"eval_steps": 1000,
"global_step": 4000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.02818489289740699,
"grad_norm": 0.6385474801063538,
"learning_rate": 4.800000000000001e-07,
"loss": 0.436,
"step": 25
},
{
"epoch": 0.05636978579481398,
"grad_norm": 0.364500492811203,
"learning_rate": 9.800000000000001e-07,
"loss": 0.4351,
"step": 50
},
{
"epoch": 0.08455467869222097,
"grad_norm": 0.249020516872406,
"learning_rate": 1.48e-06,
"loss": 0.4622,
"step": 75
},
{
"epoch": 0.11273957158962795,
"grad_norm": 0.6031928062438965,
"learning_rate": 1.98e-06,
"loss": 0.4681,
"step": 100
},
{
"epoch": 0.14092446448703494,
"grad_norm": 0.3203608989715576,
"learning_rate": 2.4800000000000004e-06,
"loss": 0.4639,
"step": 125
},
{
"epoch": 0.16910935738444194,
"grad_norm": 0.6545065641403198,
"learning_rate": 2.9800000000000003e-06,
"loss": 0.4419,
"step": 150
},
{
"epoch": 0.19729425028184894,
"grad_norm": 0.2052975744009018,
"learning_rate": 3.48e-06,
"loss": 0.4438,
"step": 175
},
{
"epoch": 0.2254791431792559,
"grad_norm": 0.6051822304725647,
"learning_rate": 3.980000000000001e-06,
"loss": 0.4034,
"step": 200
},
{
"epoch": 0.25366403607666294,
"grad_norm": 0.6141038537025452,
"learning_rate": 4.48e-06,
"loss": 0.4104,
"step": 225
},
{
"epoch": 0.2818489289740699,
"grad_norm": 0.534954845905304,
"learning_rate": 4.980000000000001e-06,
"loss": 0.4167,
"step": 250
},
{
"epoch": 0.3100338218714769,
"grad_norm": 0.6292374730110168,
"learning_rate": 5.480000000000001e-06,
"loss": 0.387,
"step": 275
},
{
"epoch": 0.3382187147688839,
"grad_norm": 0.5661519765853882,
"learning_rate": 5.98e-06,
"loss": 0.4088,
"step": 300
},
{
"epoch": 0.3664036076662909,
"grad_norm": 0.3687807321548462,
"learning_rate": 6.480000000000001e-06,
"loss": 0.3419,
"step": 325
},
{
"epoch": 0.3945885005636979,
"grad_norm": 0.5748394131660461,
"learning_rate": 6.98e-06,
"loss": 0.3881,
"step": 350
},
{
"epoch": 0.4227733934611049,
"grad_norm": 0.40532639622688293,
"learning_rate": 7.48e-06,
"loss": 0.3589,
"step": 375
},
{
"epoch": 0.4509582863585118,
"grad_norm": 0.2873406410217285,
"learning_rate": 7.980000000000002e-06,
"loss": 0.3263,
"step": 400
},
{
"epoch": 0.4791431792559188,
"grad_norm": 0.3314521908760071,
"learning_rate": 8.48e-06,
"loss": 0.3614,
"step": 425
},
{
"epoch": 0.5073280721533259,
"grad_norm": 0.1603882908821106,
"learning_rate": 8.98e-06,
"loss": 0.2867,
"step": 450
},
{
"epoch": 0.5355129650507328,
"grad_norm": 0.2617914378643036,
"learning_rate": 9.48e-06,
"loss": 0.3226,
"step": 475
},
{
"epoch": 0.5636978579481398,
"grad_norm": 0.16243544220924377,
"learning_rate": 9.980000000000001e-06,
"loss": 0.2966,
"step": 500
},
{
"epoch": 0.5918827508455468,
"grad_norm": 0.21829640865325928,
"learning_rate": 9.946666666666667e-06,
"loss": 0.2815,
"step": 525
},
{
"epoch": 0.6200676437429538,
"grad_norm": 0.30274373292922974,
"learning_rate": 9.891111111111113e-06,
"loss": 0.2956,
"step": 550
},
{
"epoch": 0.6482525366403608,
"grad_norm": 0.5401255488395691,
"learning_rate": 9.835555555555556e-06,
"loss": 0.2772,
"step": 575
},
{
"epoch": 0.6764374295377678,
"grad_norm": 0.596587061882019,
"learning_rate": 9.780000000000001e-06,
"loss": 0.2943,
"step": 600
},
{
"epoch": 0.7046223224351748,
"grad_norm": 0.2570999562740326,
"learning_rate": 9.724444444444445e-06,
"loss": 0.2749,
"step": 625
},
{
"epoch": 0.7328072153325818,
"grad_norm": 0.18179918825626373,
"learning_rate": 9.66888888888889e-06,
"loss": 0.2767,
"step": 650
},
{
"epoch": 0.7609921082299888,
"grad_norm": 0.13621068000793457,
"learning_rate": 9.613333333333335e-06,
"loss": 0.2697,
"step": 675
},
{
"epoch": 0.7891770011273957,
"grad_norm": 0.2660331130027771,
"learning_rate": 9.557777777777777e-06,
"loss": 0.2742,
"step": 700
},
{
"epoch": 0.8173618940248027,
"grad_norm": 0.11947935819625854,
"learning_rate": 9.502222222222223e-06,
"loss": 0.2753,
"step": 725
},
{
"epoch": 0.8455467869222097,
"grad_norm": 0.2724321484565735,
"learning_rate": 9.446666666666667e-06,
"loss": 0.2494,
"step": 750
},
{
"epoch": 0.8737316798196166,
"grad_norm": 0.11410418152809143,
"learning_rate": 9.391111111111111e-06,
"loss": 0.264,
"step": 775
},
{
"epoch": 0.9019165727170236,
"grad_norm": 0.13550838828086853,
"learning_rate": 9.335555555555557e-06,
"loss": 0.2611,
"step": 800
},
{
"epoch": 0.9301014656144306,
"grad_norm": 1.507051706314087,
"learning_rate": 9.280000000000001e-06,
"loss": 0.2748,
"step": 825
},
{
"epoch": 0.9582863585118376,
"grad_norm": 0.11221399158239365,
"learning_rate": 9.224444444444445e-06,
"loss": 0.2972,
"step": 850
},
{
"epoch": 0.9864712514092446,
"grad_norm": 0.705410361289978,
"learning_rate": 9.168888888888889e-06,
"loss": 0.2527,
"step": 875
},
{
"epoch": 1.0146561443066517,
"grad_norm": 0.13268794119358063,
"learning_rate": 9.113333333333335e-06,
"loss": 0.2646,
"step": 900
},
{
"epoch": 1.0428410372040586,
"grad_norm": 1.25346839427948,
"learning_rate": 9.057777777777779e-06,
"loss": 0.265,
"step": 925
},
{
"epoch": 1.0710259301014655,
"grad_norm": 1.471956491470337,
"learning_rate": 9.002222222222223e-06,
"loss": 0.2856,
"step": 950
},
{
"epoch": 1.0992108229988726,
"grad_norm": 0.13319148123264313,
"learning_rate": 8.946666666666669e-06,
"loss": 0.2552,
"step": 975
},
{
"epoch": 1.1273957158962795,
"grad_norm": 0.7707216143608093,
"learning_rate": 8.891111111111111e-06,
"loss": 0.2825,
"step": 1000
},
{
"epoch": 1.1273957158962795,
"eval_loss": 1.5161501169204712,
"eval_runtime": 3096.2186,
"eval_samples_per_second": 0.541,
"eval_steps_per_second": 0.017,
"eval_wer": 168.64974807007812,
"step": 1000
},
{
"epoch": 1.1555806087936866,
"grad_norm": 1.633041262626648,
"learning_rate": 8.835555555555557e-06,
"loss": 0.2616,
"step": 1025
},
{
"epoch": 1.1837655016910935,
"grad_norm": 0.19760851562023163,
"learning_rate": 8.78e-06,
"loss": 0.2414,
"step": 1050
},
{
"epoch": 1.2119503945885006,
"grad_norm": 0.6687982678413391,
"learning_rate": 8.724444444444445e-06,
"loss": 0.2686,
"step": 1075
},
{
"epoch": 1.2401352874859075,
"grad_norm": 0.09788186848163605,
"learning_rate": 8.66888888888889e-06,
"loss": 0.2436,
"step": 1100
},
{
"epoch": 1.2683201803833146,
"grad_norm": 0.3376760482788086,
"learning_rate": 8.613333333333333e-06,
"loss": 0.2455,
"step": 1125
},
{
"epoch": 1.2965050732807215,
"grad_norm": 0.16203612089157104,
"learning_rate": 8.557777777777778e-06,
"loss": 0.2656,
"step": 1150
},
{
"epoch": 1.3246899661781284,
"grad_norm": 0.12653979659080505,
"learning_rate": 8.502222222222223e-06,
"loss": 0.2428,
"step": 1175
},
{
"epoch": 1.3528748590755355,
"grad_norm": 0.12609504163265228,
"learning_rate": 8.446666666666668e-06,
"loss": 0.2584,
"step": 1200
},
{
"epoch": 1.3810597519729426,
"grad_norm": 0.6176909804344177,
"learning_rate": 8.391111111111112e-06,
"loss": 0.2551,
"step": 1225
},
{
"epoch": 1.4092446448703495,
"grad_norm": 0.14362627267837524,
"learning_rate": 8.335555555555556e-06,
"loss": 0.2623,
"step": 1250
},
{
"epoch": 1.4374295377677564,
"grad_norm": 0.11503814905881882,
"learning_rate": 8.28e-06,
"loss": 0.2581,
"step": 1275
},
{
"epoch": 1.4656144306651635,
"grad_norm": 0.13389258086681366,
"learning_rate": 8.224444444444444e-06,
"loss": 0.2507,
"step": 1300
},
{
"epoch": 1.4937993235625704,
"grad_norm": 1.0773035287857056,
"learning_rate": 8.16888888888889e-06,
"loss": 0.2409,
"step": 1325
},
{
"epoch": 1.5219842164599775,
"grad_norm": 0.6458689570426941,
"learning_rate": 8.113333333333334e-06,
"loss": 0.24,
"step": 1350
},
{
"epoch": 1.5501691093573844,
"grad_norm": 0.11144981533288956,
"learning_rate": 8.057777777777778e-06,
"loss": 0.2624,
"step": 1375
},
{
"epoch": 1.5783540022547915,
"grad_norm": 0.963024914264679,
"learning_rate": 8.002222222222222e-06,
"loss": 0.2381,
"step": 1400
},
{
"epoch": 1.6065388951521984,
"grad_norm": 2.91090989112854,
"learning_rate": 7.946666666666666e-06,
"loss": 0.2548,
"step": 1425
},
{
"epoch": 1.6347237880496053,
"grad_norm": 0.5699247717857361,
"learning_rate": 7.891111111111112e-06,
"loss": 0.273,
"step": 1450
},
{
"epoch": 1.6629086809470124,
"grad_norm": 0.11966383457183838,
"learning_rate": 7.835555555555556e-06,
"loss": 0.2538,
"step": 1475
},
{
"epoch": 1.6910935738444195,
"grad_norm": 0.49657556414604187,
"learning_rate": 7.78e-06,
"loss": 0.2446,
"step": 1500
},
{
"epoch": 1.7192784667418264,
"grad_norm": 0.21423515677452087,
"learning_rate": 7.724444444444446e-06,
"loss": 0.2365,
"step": 1525
},
{
"epoch": 1.7474633596392333,
"grad_norm": 0.3403068482875824,
"learning_rate": 7.66888888888889e-06,
"loss": 0.2382,
"step": 1550
},
{
"epoch": 1.7756482525366404,
"grad_norm": 0.12040483951568604,
"learning_rate": 7.613333333333334e-06,
"loss": 0.2433,
"step": 1575
},
{
"epoch": 1.8038331454340475,
"grad_norm": 0.1376182585954666,
"learning_rate": 7.557777777777779e-06,
"loss": 0.251,
"step": 1600
},
{
"epoch": 1.8320180383314544,
"grad_norm": 0.4510025382041931,
"learning_rate": 7.502222222222223e-06,
"loss": 0.2285,
"step": 1625
},
{
"epoch": 1.8602029312288613,
"grad_norm": 0.19896015524864197,
"learning_rate": 7.446666666666668e-06,
"loss": 0.2386,
"step": 1650
},
{
"epoch": 1.8883878241262684,
"grad_norm": 0.12030182778835297,
"learning_rate": 7.3911111111111125e-06,
"loss": 0.2544,
"step": 1675
},
{
"epoch": 1.9165727170236753,
"grad_norm": 0.13691359758377075,
"learning_rate": 7.335555555555556e-06,
"loss": 0.2426,
"step": 1700
},
{
"epoch": 1.9447576099210822,
"grad_norm": 0.1743856519460678,
"learning_rate": 7.280000000000001e-06,
"loss": 0.2532,
"step": 1725
},
{
"epoch": 1.9729425028184893,
"grad_norm": 0.19390033185482025,
"learning_rate": 7.224444444444445e-06,
"loss": 0.2397,
"step": 1750
},
{
"epoch": 2.0011273957158964,
"grad_norm": 0.22048410773277283,
"learning_rate": 7.1688888888888895e-06,
"loss": 0.2398,
"step": 1775
},
{
"epoch": 2.0293122886133035,
"grad_norm": 0.12077363580465317,
"learning_rate": 7.113333333333334e-06,
"loss": 0.2354,
"step": 1800
},
{
"epoch": 2.05749718151071,
"grad_norm": 0.6927027106285095,
"learning_rate": 7.057777777777778e-06,
"loss": 0.241,
"step": 1825
},
{
"epoch": 2.0856820744081173,
"grad_norm": 0.1784224808216095,
"learning_rate": 7.0022222222222225e-06,
"loss": 0.2477,
"step": 1850
},
{
"epoch": 2.1138669673055244,
"grad_norm": 0.13008733093738556,
"learning_rate": 6.946666666666667e-06,
"loss": 0.2634,
"step": 1875
},
{
"epoch": 2.142051860202931,
"grad_norm": 0.11152646690607071,
"learning_rate": 6.891111111111111e-06,
"loss": 0.2198,
"step": 1900
},
{
"epoch": 2.170236753100338,
"grad_norm": 0.15575166046619415,
"learning_rate": 6.835555555555556e-06,
"loss": 0.2545,
"step": 1925
},
{
"epoch": 2.1984216459977453,
"grad_norm": 0.13412870466709137,
"learning_rate": 6.780000000000001e-06,
"loss": 0.2353,
"step": 1950
},
{
"epoch": 2.2266065388951524,
"grad_norm": 0.1718331277370453,
"learning_rate": 6.724444444444444e-06,
"loss": 0.2519,
"step": 1975
},
{
"epoch": 2.254791431792559,
"grad_norm": 0.17061075568199158,
"learning_rate": 6.668888888888889e-06,
"loss": 0.2179,
"step": 2000
},
{
"epoch": 2.254791431792559,
"eval_loss": 1.4350669384002686,
"eval_runtime": 3183.8495,
"eval_samples_per_second": 0.526,
"eval_steps_per_second": 0.017,
"eval_wer": 179.06901493089262,
"step": 2000
},
{
"epoch": 2.282976324689966,
"grad_norm": 0.12167395651340485,
"learning_rate": 6.613333333333334e-06,
"loss": 0.2362,
"step": 2025
},
{
"epoch": 2.3111612175873733,
"grad_norm": 0.14669859409332275,
"learning_rate": 6.557777777777778e-06,
"loss": 0.2337,
"step": 2050
},
{
"epoch": 2.3393461104847804,
"grad_norm": 0.8504271507263184,
"learning_rate": 6.502222222222223e-06,
"loss": 0.2364,
"step": 2075
},
{
"epoch": 2.367531003382187,
"grad_norm": 0.09018735587596893,
"learning_rate": 6.446666666666668e-06,
"loss": 0.2449,
"step": 2100
},
{
"epoch": 2.395715896279594,
"grad_norm": 1.927909255027771,
"learning_rate": 6.391111111111111e-06,
"loss": 0.2374,
"step": 2125
},
{
"epoch": 2.4239007891770012,
"grad_norm": 0.1180146113038063,
"learning_rate": 6.335555555555556e-06,
"loss": 0.2339,
"step": 2150
},
{
"epoch": 2.452085682074408,
"grad_norm": 0.14500541985034943,
"learning_rate": 6.280000000000001e-06,
"loss": 0.2677,
"step": 2175
},
{
"epoch": 2.480270574971815,
"grad_norm": 0.10550981760025024,
"learning_rate": 6.224444444444445e-06,
"loss": 0.2435,
"step": 2200
},
{
"epoch": 2.508455467869222,
"grad_norm": 0.8889521360397339,
"learning_rate": 6.16888888888889e-06,
"loss": 0.2498,
"step": 2225
},
{
"epoch": 2.5366403607666292,
"grad_norm": 2.0345895290374756,
"learning_rate": 6.113333333333333e-06,
"loss": 0.2339,
"step": 2250
},
{
"epoch": 2.5648252536640364,
"grad_norm": 0.13331718742847443,
"learning_rate": 6.057777777777778e-06,
"loss": 0.241,
"step": 2275
},
{
"epoch": 2.593010146561443,
"grad_norm": 1.2904160022735596,
"learning_rate": 6.002222222222223e-06,
"loss": 0.2518,
"step": 2300
},
{
"epoch": 2.62119503945885,
"grad_norm": 0.11779336631298065,
"learning_rate": 5.946666666666668e-06,
"loss": 0.236,
"step": 2325
},
{
"epoch": 2.649379932356257,
"grad_norm": 0.11726631969213486,
"learning_rate": 5.891111111111112e-06,
"loss": 0.2335,
"step": 2350
},
{
"epoch": 2.677564825253664,
"grad_norm": 0.12784917652606964,
"learning_rate": 5.8355555555555565e-06,
"loss": 0.2315,
"step": 2375
},
{
"epoch": 2.705749718151071,
"grad_norm": 0.1314825415611267,
"learning_rate": 5.78e-06,
"loss": 0.235,
"step": 2400
},
{
"epoch": 2.733934611048478,
"grad_norm": 0.21480953693389893,
"learning_rate": 5.724444444444445e-06,
"loss": 0.2367,
"step": 2425
},
{
"epoch": 2.7621195039458852,
"grad_norm": 0.11732250452041626,
"learning_rate": 5.6688888888888895e-06,
"loss": 0.2336,
"step": 2450
},
{
"epoch": 2.790304396843292,
"grad_norm": 0.11237554252147675,
"learning_rate": 5.613333333333334e-06,
"loss": 0.2501,
"step": 2475
},
{
"epoch": 2.818489289740699,
"grad_norm": 0.12970662117004395,
"learning_rate": 5.557777777777778e-06,
"loss": 0.2266,
"step": 2500
},
{
"epoch": 2.846674182638106,
"grad_norm": 0.19031056761741638,
"learning_rate": 5.5022222222222224e-06,
"loss": 0.242,
"step": 2525
},
{
"epoch": 2.874859075535513,
"grad_norm": 0.16265034675598145,
"learning_rate": 5.4466666666666665e-06,
"loss": 0.2278,
"step": 2550
},
{
"epoch": 2.90304396843292,
"grad_norm": 0.27451595664024353,
"learning_rate": 5.391111111111111e-06,
"loss": 0.2493,
"step": 2575
},
{
"epoch": 2.931228861330327,
"grad_norm": 0.17422199249267578,
"learning_rate": 5.335555555555556e-06,
"loss": 0.2407,
"step": 2600
},
{
"epoch": 2.959413754227734,
"grad_norm": 0.11225639283657074,
"learning_rate": 5.28e-06,
"loss": 0.221,
"step": 2625
},
{
"epoch": 2.987598647125141,
"grad_norm": 0.11298167705535889,
"learning_rate": 5.224444444444445e-06,
"loss": 0.2253,
"step": 2650
},
{
"epoch": 3.015783540022548,
"grad_norm": 0.13156744837760925,
"learning_rate": 5.168888888888889e-06,
"loss": 0.2494,
"step": 2675
},
{
"epoch": 3.043968432919955,
"grad_norm": 2.454240083694458,
"learning_rate": 5.113333333333333e-06,
"loss": 0.2615,
"step": 2700
},
{
"epoch": 3.0721533258173617,
"grad_norm": 0.11773987859487534,
"learning_rate": 5.057777777777778e-06,
"loss": 0.2292,
"step": 2725
},
{
"epoch": 3.100338218714769,
"grad_norm": 2.4229350090026855,
"learning_rate": 5.002222222222223e-06,
"loss": 0.2492,
"step": 2750
},
{
"epoch": 3.128523111612176,
"grad_norm": 0.13231739401817322,
"learning_rate": 4.946666666666667e-06,
"loss": 0.2182,
"step": 2775
},
{
"epoch": 3.156708004509583,
"grad_norm": 0.11936808377504349,
"learning_rate": 4.891111111111111e-06,
"loss": 0.2279,
"step": 2800
},
{
"epoch": 3.1848928974069897,
"grad_norm": 3.4271562099456787,
"learning_rate": 4.835555555555556e-06,
"loss": 0.2313,
"step": 2825
},
{
"epoch": 3.2130777903043968,
"grad_norm": 0.13328012824058533,
"learning_rate": 4.78e-06,
"loss": 0.2176,
"step": 2850
},
{
"epoch": 3.241262683201804,
"grad_norm": 0.15237456560134888,
"learning_rate": 4.724444444444445e-06,
"loss": 0.2397,
"step": 2875
},
{
"epoch": 3.269447576099211,
"grad_norm": 0.13094407320022583,
"learning_rate": 4.66888888888889e-06,
"loss": 0.2509,
"step": 2900
},
{
"epoch": 3.2976324689966177,
"grad_norm": 0.14969752728939056,
"learning_rate": 4.613333333333334e-06,
"loss": 0.2281,
"step": 2925
},
{
"epoch": 3.3258173618940248,
"grad_norm": 0.2272210568189621,
"learning_rate": 4.557777777777778e-06,
"loss": 0.2539,
"step": 2950
},
{
"epoch": 3.354002254791432,
"grad_norm": 0.1374891996383667,
"learning_rate": 4.502222222222223e-06,
"loss": 0.2249,
"step": 2975
},
{
"epoch": 3.382187147688839,
"grad_norm": 0.14481812715530396,
"learning_rate": 4.446666666666667e-06,
"loss": 0.2385,
"step": 3000
},
{
"epoch": 3.382187147688839,
"eval_loss": 1.3986942768096924,
"eval_runtime": 3116.7262,
"eval_samples_per_second": 0.538,
"eval_steps_per_second": 0.017,
"eval_wer": 183.03517773771554,
"step": 3000
},
{
"epoch": 3.4103720405862457,
"grad_norm": 0.14197325706481934,
"learning_rate": 4.391111111111112e-06,
"loss": 0.2263,
"step": 3025
},
{
"epoch": 3.4385569334836528,
"grad_norm": 2.5543010234832764,
"learning_rate": 4.3355555555555565e-06,
"loss": 0.2168,
"step": 3050
},
{
"epoch": 3.46674182638106,
"grad_norm": 0.3014455735683441,
"learning_rate": 4.2800000000000005e-06,
"loss": 0.2369,
"step": 3075
},
{
"epoch": 3.4949267192784665,
"grad_norm": 0.11696666479110718,
"learning_rate": 4.2244444444444446e-06,
"loss": 0.229,
"step": 3100
},
{
"epoch": 3.5231116121758737,
"grad_norm": 0.14176321029663086,
"learning_rate": 4.168888888888889e-06,
"loss": 0.2215,
"step": 3125
},
{
"epoch": 3.5512965050732808,
"grad_norm": 2.6928396224975586,
"learning_rate": 4.1133333333333335e-06,
"loss": 0.2185,
"step": 3150
},
{
"epoch": 3.579481397970688,
"grad_norm": 0.5729812383651733,
"learning_rate": 4.057777777777778e-06,
"loss": 0.2259,
"step": 3175
},
{
"epoch": 3.6076662908680945,
"grad_norm": 0.33455201983451843,
"learning_rate": 4.002222222222222e-06,
"loss": 0.2186,
"step": 3200
},
{
"epoch": 3.6358511837655016,
"grad_norm": 0.16064058244228363,
"learning_rate": 3.946666666666667e-06,
"loss": 0.2348,
"step": 3225
},
{
"epoch": 3.6640360766629088,
"grad_norm": 0.13294631242752075,
"learning_rate": 3.891111111111111e-06,
"loss": 0.2484,
"step": 3250
},
{
"epoch": 3.6922209695603154,
"grad_norm": 0.6433837413787842,
"learning_rate": 3.835555555555555e-06,
"loss": 0.2384,
"step": 3275
},
{
"epoch": 3.7204058624577225,
"grad_norm": 0.14757393300533295,
"learning_rate": 3.7800000000000002e-06,
"loss": 0.231,
"step": 3300
},
{
"epoch": 3.7485907553551296,
"grad_norm": 0.34130287170410156,
"learning_rate": 3.724444444444445e-06,
"loss": 0.2477,
"step": 3325
},
{
"epoch": 3.7767756482525368,
"grad_norm": 0.12332061678171158,
"learning_rate": 3.668888888888889e-06,
"loss": 0.2437,
"step": 3350
},
{
"epoch": 3.804960541149944,
"grad_norm": 0.1419714242219925,
"learning_rate": 3.6133333333333336e-06,
"loss": 0.2223,
"step": 3375
},
{
"epoch": 3.8331454340473505,
"grad_norm": 0.14998725056648254,
"learning_rate": 3.5577777777777785e-06,
"loss": 0.2322,
"step": 3400
},
{
"epoch": 3.8613303269447576,
"grad_norm": 0.14752830564975739,
"learning_rate": 3.5022222222222225e-06,
"loss": 0.2419,
"step": 3425
},
{
"epoch": 3.8895152198421648,
"grad_norm": 0.19776736199855804,
"learning_rate": 3.446666666666667e-06,
"loss": 0.2214,
"step": 3450
},
{
"epoch": 3.9177001127395714,
"grad_norm": 0.12292192131280899,
"learning_rate": 3.391111111111111e-06,
"loss": 0.2327,
"step": 3475
},
{
"epoch": 3.9458850056369785,
"grad_norm": 0.12656843662261963,
"learning_rate": 3.335555555555556e-06,
"loss": 0.2244,
"step": 3500
},
{
"epoch": 3.9740698985343856,
"grad_norm": 0.14774179458618164,
"learning_rate": 3.2800000000000004e-06,
"loss": 0.2163,
"step": 3525
},
{
"epoch": 4.002254791431793,
"grad_norm": 0.1358705759048462,
"learning_rate": 3.2244444444444444e-06,
"loss": 0.2274,
"step": 3550
},
{
"epoch": 4.0304396843292,
"grad_norm": 0.14576297998428345,
"learning_rate": 3.1688888888888893e-06,
"loss": 0.2239,
"step": 3575
},
{
"epoch": 4.058624577226607,
"grad_norm": 0.13178740441799164,
"learning_rate": 3.1133333333333337e-06,
"loss": 0.22,
"step": 3600
},
{
"epoch": 4.086809470124013,
"grad_norm": 0.7160453200340271,
"learning_rate": 3.0577777777777778e-06,
"loss": 0.2244,
"step": 3625
},
{
"epoch": 4.11499436302142,
"grad_norm": 0.5130426287651062,
"learning_rate": 3.0022222222222227e-06,
"loss": 0.2194,
"step": 3650
},
{
"epoch": 4.143179255918827,
"grad_norm": 0.14089062809944153,
"learning_rate": 2.946666666666667e-06,
"loss": 0.2381,
"step": 3675
},
{
"epoch": 4.1713641488162345,
"grad_norm": 3.9715943336486816,
"learning_rate": 2.891111111111111e-06,
"loss": 0.2209,
"step": 3700
},
{
"epoch": 4.199549041713642,
"grad_norm": 0.5900473594665527,
"learning_rate": 2.835555555555556e-06,
"loss": 0.2273,
"step": 3725
},
{
"epoch": 4.227733934611049,
"grad_norm": 0.13557352125644684,
"learning_rate": 2.7800000000000005e-06,
"loss": 0.2112,
"step": 3750
},
{
"epoch": 4.255918827508456,
"grad_norm": 0.17545416951179504,
"learning_rate": 2.7244444444444445e-06,
"loss": 0.2403,
"step": 3775
},
{
"epoch": 4.284103720405862,
"grad_norm": 0.17527706921100616,
"learning_rate": 2.6688888888888894e-06,
"loss": 0.2182,
"step": 3800
},
{
"epoch": 4.312288613303269,
"grad_norm": 0.12308468669652939,
"learning_rate": 2.6133333333333334e-06,
"loss": 0.2297,
"step": 3825
},
{
"epoch": 4.340473506200676,
"grad_norm": 0.1240859255194664,
"learning_rate": 2.557777777777778e-06,
"loss": 0.2391,
"step": 3850
},
{
"epoch": 4.368658399098083,
"grad_norm": 0.24992740154266357,
"learning_rate": 2.5022222222222224e-06,
"loss": 0.2386,
"step": 3875
},
{
"epoch": 4.3968432919954905,
"grad_norm": 0.15345261991024017,
"learning_rate": 2.446666666666667e-06,
"loss": 0.2223,
"step": 3900
},
{
"epoch": 4.425028184892898,
"grad_norm": 0.10449715703725815,
"learning_rate": 2.3911111111111113e-06,
"loss": 0.2794,
"step": 3925
},
{
"epoch": 4.453213077790305,
"grad_norm": 0.12438195198774338,
"learning_rate": 2.3355555555555557e-06,
"loss": 0.2313,
"step": 3950
},
{
"epoch": 4.481397970687711,
"grad_norm": 0.11366376280784607,
"learning_rate": 2.28e-06,
"loss": 0.2357,
"step": 3975
},
{
"epoch": 4.509582863585118,
"grad_norm": 0.12389075011014938,
"learning_rate": 2.2244444444444447e-06,
"loss": 0.24,
"step": 4000
},
{
"epoch": 4.509582863585118,
"eval_loss": 1.378143548965454,
"eval_runtime": 3077.3495,
"eval_samples_per_second": 0.545,
"eval_steps_per_second": 0.017,
"eval_wer": 176.00887532935792,
"step": 4000
}
],
"logging_steps": 25,
"max_steps": 5000,
"num_input_tokens_seen": 0,
"num_train_epochs": 6,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.854868401487872e+19,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}