{ "best_global_step": 1000, "best_metric": 168.64974807007812, "best_model_checkpoint": "./whisper-small-finetuned-multilingual-on-kaggle-v2/checkpoint-1000", "epoch": 4.509582863585118, "eval_steps": 1000, "global_step": 4000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02818489289740699, "grad_norm": 0.6385474801063538, "learning_rate": 4.800000000000001e-07, "loss": 0.436, "step": 25 }, { "epoch": 0.05636978579481398, "grad_norm": 0.364500492811203, "learning_rate": 9.800000000000001e-07, "loss": 0.4351, "step": 50 }, { "epoch": 0.08455467869222097, "grad_norm": 0.249020516872406, "learning_rate": 1.48e-06, "loss": 0.4622, "step": 75 }, { "epoch": 0.11273957158962795, "grad_norm": 0.6031928062438965, "learning_rate": 1.98e-06, "loss": 0.4681, "step": 100 }, { "epoch": 0.14092446448703494, "grad_norm": 0.3203608989715576, "learning_rate": 2.4800000000000004e-06, "loss": 0.4639, "step": 125 }, { "epoch": 0.16910935738444194, "grad_norm": 0.6545065641403198, "learning_rate": 2.9800000000000003e-06, "loss": 0.4419, "step": 150 }, { "epoch": 0.19729425028184894, "grad_norm": 0.2052975744009018, "learning_rate": 3.48e-06, "loss": 0.4438, "step": 175 }, { "epoch": 0.2254791431792559, "grad_norm": 0.6051822304725647, "learning_rate": 3.980000000000001e-06, "loss": 0.4034, "step": 200 }, { "epoch": 0.25366403607666294, "grad_norm": 0.6141038537025452, "learning_rate": 4.48e-06, "loss": 0.4104, "step": 225 }, { "epoch": 0.2818489289740699, "grad_norm": 0.534954845905304, "learning_rate": 4.980000000000001e-06, "loss": 0.4167, "step": 250 }, { "epoch": 0.3100338218714769, "grad_norm": 0.6292374730110168, "learning_rate": 5.480000000000001e-06, "loss": 0.387, "step": 275 }, { "epoch": 0.3382187147688839, "grad_norm": 0.5661519765853882, "learning_rate": 5.98e-06, "loss": 0.4088, "step": 300 }, { "epoch": 0.3664036076662909, "grad_norm": 0.3687807321548462, "learning_rate": 6.480000000000001e-06, "loss": 0.3419, "step": 325 }, { "epoch": 0.3945885005636979, "grad_norm": 0.5748394131660461, "learning_rate": 6.98e-06, "loss": 0.3881, "step": 350 }, { "epoch": 0.4227733934611049, "grad_norm": 0.40532639622688293, "learning_rate": 7.48e-06, "loss": 0.3589, "step": 375 }, { "epoch": 0.4509582863585118, "grad_norm": 0.2873406410217285, "learning_rate": 7.980000000000002e-06, "loss": 0.3263, "step": 400 }, { "epoch": 0.4791431792559188, "grad_norm": 0.3314521908760071, "learning_rate": 8.48e-06, "loss": 0.3614, "step": 425 }, { "epoch": 0.5073280721533259, "grad_norm": 0.1603882908821106, "learning_rate": 8.98e-06, "loss": 0.2867, "step": 450 }, { "epoch": 0.5355129650507328, "grad_norm": 0.2617914378643036, "learning_rate": 9.48e-06, "loss": 0.3226, "step": 475 }, { "epoch": 0.5636978579481398, "grad_norm": 0.16243544220924377, "learning_rate": 9.980000000000001e-06, "loss": 0.2966, "step": 500 }, { "epoch": 0.5918827508455468, "grad_norm": 0.21829640865325928, "learning_rate": 9.946666666666667e-06, "loss": 0.2815, "step": 525 }, { "epoch": 0.6200676437429538, "grad_norm": 0.30274373292922974, "learning_rate": 9.891111111111113e-06, "loss": 0.2956, "step": 550 }, { "epoch": 0.6482525366403608, "grad_norm": 0.5401255488395691, "learning_rate": 9.835555555555556e-06, "loss": 0.2772, "step": 575 }, { "epoch": 0.6764374295377678, "grad_norm": 0.596587061882019, "learning_rate": 9.780000000000001e-06, "loss": 0.2943, "step": 600 }, { "epoch": 0.7046223224351748, "grad_norm": 0.2570999562740326, "learning_rate": 9.724444444444445e-06, "loss": 0.2749, "step": 625 }, { "epoch": 0.7328072153325818, "grad_norm": 0.18179918825626373, "learning_rate": 9.66888888888889e-06, "loss": 0.2767, "step": 650 }, { "epoch": 0.7609921082299888, "grad_norm": 0.13621068000793457, "learning_rate": 9.613333333333335e-06, "loss": 0.2697, "step": 675 }, { "epoch": 0.7891770011273957, "grad_norm": 0.2660331130027771, "learning_rate": 9.557777777777777e-06, "loss": 0.2742, "step": 700 }, { "epoch": 0.8173618940248027, "grad_norm": 0.11947935819625854, "learning_rate": 9.502222222222223e-06, "loss": 0.2753, "step": 725 }, { "epoch": 0.8455467869222097, "grad_norm": 0.2724321484565735, "learning_rate": 9.446666666666667e-06, "loss": 0.2494, "step": 750 }, { "epoch": 0.8737316798196166, "grad_norm": 0.11410418152809143, "learning_rate": 9.391111111111111e-06, "loss": 0.264, "step": 775 }, { "epoch": 0.9019165727170236, "grad_norm": 0.13550838828086853, "learning_rate": 9.335555555555557e-06, "loss": 0.2611, "step": 800 }, { "epoch": 0.9301014656144306, "grad_norm": 1.507051706314087, "learning_rate": 9.280000000000001e-06, "loss": 0.2748, "step": 825 }, { "epoch": 0.9582863585118376, "grad_norm": 0.11221399158239365, "learning_rate": 9.224444444444445e-06, "loss": 0.2972, "step": 850 }, { "epoch": 0.9864712514092446, "grad_norm": 0.705410361289978, "learning_rate": 9.168888888888889e-06, "loss": 0.2527, "step": 875 }, { "epoch": 1.0146561443066517, "grad_norm": 0.13268794119358063, "learning_rate": 9.113333333333335e-06, "loss": 0.2646, "step": 900 }, { "epoch": 1.0428410372040586, "grad_norm": 1.25346839427948, "learning_rate": 9.057777777777779e-06, "loss": 0.265, "step": 925 }, { "epoch": 1.0710259301014655, "grad_norm": 1.471956491470337, "learning_rate": 9.002222222222223e-06, "loss": 0.2856, "step": 950 }, { "epoch": 1.0992108229988726, "grad_norm": 0.13319148123264313, "learning_rate": 8.946666666666669e-06, "loss": 0.2552, "step": 975 }, { "epoch": 1.1273957158962795, "grad_norm": 0.7707216143608093, "learning_rate": 8.891111111111111e-06, "loss": 0.2825, "step": 1000 }, { "epoch": 1.1273957158962795, "eval_loss": 1.5161501169204712, "eval_runtime": 3096.2186, "eval_samples_per_second": 0.541, "eval_steps_per_second": 0.017, "eval_wer": 168.64974807007812, "step": 1000 }, { "epoch": 1.1555806087936866, "grad_norm": 1.633041262626648, "learning_rate": 8.835555555555557e-06, "loss": 0.2616, "step": 1025 }, { "epoch": 1.1837655016910935, "grad_norm": 0.19760851562023163, "learning_rate": 8.78e-06, "loss": 0.2414, "step": 1050 }, { "epoch": 1.2119503945885006, "grad_norm": 0.6687982678413391, "learning_rate": 8.724444444444445e-06, "loss": 0.2686, "step": 1075 }, { "epoch": 1.2401352874859075, "grad_norm": 0.09788186848163605, "learning_rate": 8.66888888888889e-06, "loss": 0.2436, "step": 1100 }, { "epoch": 1.2683201803833146, "grad_norm": 0.3376760482788086, "learning_rate": 8.613333333333333e-06, "loss": 0.2455, "step": 1125 }, { "epoch": 1.2965050732807215, "grad_norm": 0.16203612089157104, "learning_rate": 8.557777777777778e-06, "loss": 0.2656, "step": 1150 }, { "epoch": 1.3246899661781284, "grad_norm": 0.12653979659080505, "learning_rate": 8.502222222222223e-06, "loss": 0.2428, "step": 1175 }, { "epoch": 1.3528748590755355, "grad_norm": 0.12609504163265228, "learning_rate": 8.446666666666668e-06, "loss": 0.2584, "step": 1200 }, { "epoch": 1.3810597519729426, "grad_norm": 0.6176909804344177, "learning_rate": 8.391111111111112e-06, "loss": 0.2551, "step": 1225 }, { "epoch": 1.4092446448703495, "grad_norm": 0.14362627267837524, "learning_rate": 8.335555555555556e-06, "loss": 0.2623, "step": 1250 }, { "epoch": 1.4374295377677564, "grad_norm": 0.11503814905881882, "learning_rate": 8.28e-06, "loss": 0.2581, "step": 1275 }, { "epoch": 1.4656144306651635, "grad_norm": 0.13389258086681366, "learning_rate": 8.224444444444444e-06, "loss": 0.2507, "step": 1300 }, { "epoch": 1.4937993235625704, "grad_norm": 1.0773035287857056, "learning_rate": 8.16888888888889e-06, "loss": 0.2409, "step": 1325 }, { "epoch": 1.5219842164599775, "grad_norm": 0.6458689570426941, "learning_rate": 8.113333333333334e-06, "loss": 0.24, "step": 1350 }, { "epoch": 1.5501691093573844, "grad_norm": 0.11144981533288956, "learning_rate": 8.057777777777778e-06, "loss": 0.2624, "step": 1375 }, { "epoch": 1.5783540022547915, "grad_norm": 0.963024914264679, "learning_rate": 8.002222222222222e-06, "loss": 0.2381, "step": 1400 }, { "epoch": 1.6065388951521984, "grad_norm": 2.91090989112854, "learning_rate": 7.946666666666666e-06, "loss": 0.2548, "step": 1425 }, { "epoch": 1.6347237880496053, "grad_norm": 0.5699247717857361, "learning_rate": 7.891111111111112e-06, "loss": 0.273, "step": 1450 }, { "epoch": 1.6629086809470124, "grad_norm": 0.11966383457183838, "learning_rate": 7.835555555555556e-06, "loss": 0.2538, "step": 1475 }, { "epoch": 1.6910935738444195, "grad_norm": 0.49657556414604187, "learning_rate": 7.78e-06, "loss": 0.2446, "step": 1500 }, { "epoch": 1.7192784667418264, "grad_norm": 0.21423515677452087, "learning_rate": 7.724444444444446e-06, "loss": 0.2365, "step": 1525 }, { "epoch": 1.7474633596392333, "grad_norm": 0.3403068482875824, "learning_rate": 7.66888888888889e-06, "loss": 0.2382, "step": 1550 }, { "epoch": 1.7756482525366404, "grad_norm": 0.12040483951568604, "learning_rate": 7.613333333333334e-06, "loss": 0.2433, "step": 1575 }, { "epoch": 1.8038331454340475, "grad_norm": 0.1376182585954666, "learning_rate": 7.557777777777779e-06, "loss": 0.251, "step": 1600 }, { "epoch": 1.8320180383314544, "grad_norm": 0.4510025382041931, "learning_rate": 7.502222222222223e-06, "loss": 0.2285, "step": 1625 }, { "epoch": 1.8602029312288613, "grad_norm": 0.19896015524864197, "learning_rate": 7.446666666666668e-06, "loss": 0.2386, "step": 1650 }, { "epoch": 1.8883878241262684, "grad_norm": 0.12030182778835297, "learning_rate": 7.3911111111111125e-06, "loss": 0.2544, "step": 1675 }, { "epoch": 1.9165727170236753, "grad_norm": 0.13691359758377075, "learning_rate": 7.335555555555556e-06, "loss": 0.2426, "step": 1700 }, { "epoch": 1.9447576099210822, "grad_norm": 0.1743856519460678, "learning_rate": 7.280000000000001e-06, "loss": 0.2532, "step": 1725 }, { "epoch": 1.9729425028184893, "grad_norm": 0.19390033185482025, "learning_rate": 7.224444444444445e-06, "loss": 0.2397, "step": 1750 }, { "epoch": 2.0011273957158964, "grad_norm": 0.22048410773277283, "learning_rate": 7.1688888888888895e-06, "loss": 0.2398, "step": 1775 }, { "epoch": 2.0293122886133035, "grad_norm": 0.12077363580465317, "learning_rate": 7.113333333333334e-06, "loss": 0.2354, "step": 1800 }, { "epoch": 2.05749718151071, "grad_norm": 0.6927027106285095, "learning_rate": 7.057777777777778e-06, "loss": 0.241, "step": 1825 }, { "epoch": 2.0856820744081173, "grad_norm": 0.1784224808216095, "learning_rate": 7.0022222222222225e-06, "loss": 0.2477, "step": 1850 }, { "epoch": 2.1138669673055244, "grad_norm": 0.13008733093738556, "learning_rate": 6.946666666666667e-06, "loss": 0.2634, "step": 1875 }, { "epoch": 2.142051860202931, "grad_norm": 0.11152646690607071, "learning_rate": 6.891111111111111e-06, "loss": 0.2198, "step": 1900 }, { "epoch": 2.170236753100338, "grad_norm": 0.15575166046619415, "learning_rate": 6.835555555555556e-06, "loss": 0.2545, "step": 1925 }, { "epoch": 2.1984216459977453, "grad_norm": 0.13412870466709137, "learning_rate": 6.780000000000001e-06, "loss": 0.2353, "step": 1950 }, { "epoch": 2.2266065388951524, "grad_norm": 0.1718331277370453, "learning_rate": 6.724444444444444e-06, "loss": 0.2519, "step": 1975 }, { "epoch": 2.254791431792559, "grad_norm": 0.17061075568199158, "learning_rate": 6.668888888888889e-06, "loss": 0.2179, "step": 2000 }, { "epoch": 2.254791431792559, "eval_loss": 1.4350669384002686, "eval_runtime": 3183.8495, "eval_samples_per_second": 0.526, "eval_steps_per_second": 0.017, "eval_wer": 179.06901493089262, "step": 2000 }, { "epoch": 2.282976324689966, "grad_norm": 0.12167395651340485, "learning_rate": 6.613333333333334e-06, "loss": 0.2362, "step": 2025 }, { "epoch": 2.3111612175873733, "grad_norm": 0.14669859409332275, "learning_rate": 6.557777777777778e-06, "loss": 0.2337, "step": 2050 }, { "epoch": 2.3393461104847804, "grad_norm": 0.8504271507263184, "learning_rate": 6.502222222222223e-06, "loss": 0.2364, "step": 2075 }, { "epoch": 2.367531003382187, "grad_norm": 0.09018735587596893, "learning_rate": 6.446666666666668e-06, "loss": 0.2449, "step": 2100 }, { "epoch": 2.395715896279594, "grad_norm": 1.927909255027771, "learning_rate": 6.391111111111111e-06, "loss": 0.2374, "step": 2125 }, { "epoch": 2.4239007891770012, "grad_norm": 0.1180146113038063, "learning_rate": 6.335555555555556e-06, "loss": 0.2339, "step": 2150 }, { "epoch": 2.452085682074408, "grad_norm": 0.14500541985034943, "learning_rate": 6.280000000000001e-06, "loss": 0.2677, "step": 2175 }, { "epoch": 2.480270574971815, "grad_norm": 0.10550981760025024, "learning_rate": 6.224444444444445e-06, "loss": 0.2435, "step": 2200 }, { "epoch": 2.508455467869222, "grad_norm": 0.8889521360397339, "learning_rate": 6.16888888888889e-06, "loss": 0.2498, "step": 2225 }, { "epoch": 2.5366403607666292, "grad_norm": 2.0345895290374756, "learning_rate": 6.113333333333333e-06, "loss": 0.2339, "step": 2250 }, { "epoch": 2.5648252536640364, "grad_norm": 0.13331718742847443, "learning_rate": 6.057777777777778e-06, "loss": 0.241, "step": 2275 }, { "epoch": 2.593010146561443, "grad_norm": 1.2904160022735596, "learning_rate": 6.002222222222223e-06, "loss": 0.2518, "step": 2300 }, { "epoch": 2.62119503945885, "grad_norm": 0.11779336631298065, "learning_rate": 5.946666666666668e-06, "loss": 0.236, "step": 2325 }, { "epoch": 2.649379932356257, "grad_norm": 0.11726631969213486, "learning_rate": 5.891111111111112e-06, "loss": 0.2335, "step": 2350 }, { "epoch": 2.677564825253664, "grad_norm": 0.12784917652606964, "learning_rate": 5.8355555555555565e-06, "loss": 0.2315, "step": 2375 }, { "epoch": 2.705749718151071, "grad_norm": 0.1314825415611267, "learning_rate": 5.78e-06, "loss": 0.235, "step": 2400 }, { "epoch": 2.733934611048478, "grad_norm": 0.21480953693389893, "learning_rate": 5.724444444444445e-06, "loss": 0.2367, "step": 2425 }, { "epoch": 2.7621195039458852, "grad_norm": 0.11732250452041626, "learning_rate": 5.6688888888888895e-06, "loss": 0.2336, "step": 2450 }, { "epoch": 2.790304396843292, "grad_norm": 0.11237554252147675, "learning_rate": 5.613333333333334e-06, "loss": 0.2501, "step": 2475 }, { "epoch": 2.818489289740699, "grad_norm": 0.12970662117004395, "learning_rate": 5.557777777777778e-06, "loss": 0.2266, "step": 2500 }, { "epoch": 2.846674182638106, "grad_norm": 0.19031056761741638, "learning_rate": 5.5022222222222224e-06, "loss": 0.242, "step": 2525 }, { "epoch": 2.874859075535513, "grad_norm": 0.16265034675598145, "learning_rate": 5.4466666666666665e-06, "loss": 0.2278, "step": 2550 }, { "epoch": 2.90304396843292, "grad_norm": 0.27451595664024353, "learning_rate": 5.391111111111111e-06, "loss": 0.2493, "step": 2575 }, { "epoch": 2.931228861330327, "grad_norm": 0.17422199249267578, "learning_rate": 5.335555555555556e-06, "loss": 0.2407, "step": 2600 }, { "epoch": 2.959413754227734, "grad_norm": 0.11225639283657074, "learning_rate": 5.28e-06, "loss": 0.221, "step": 2625 }, { "epoch": 2.987598647125141, "grad_norm": 0.11298167705535889, "learning_rate": 5.224444444444445e-06, "loss": 0.2253, "step": 2650 }, { "epoch": 3.015783540022548, "grad_norm": 0.13156744837760925, "learning_rate": 5.168888888888889e-06, "loss": 0.2494, "step": 2675 }, { "epoch": 3.043968432919955, "grad_norm": 2.454240083694458, "learning_rate": 5.113333333333333e-06, "loss": 0.2615, "step": 2700 }, { "epoch": 3.0721533258173617, "grad_norm": 0.11773987859487534, "learning_rate": 5.057777777777778e-06, "loss": 0.2292, "step": 2725 }, { "epoch": 3.100338218714769, "grad_norm": 2.4229350090026855, "learning_rate": 5.002222222222223e-06, "loss": 0.2492, "step": 2750 }, { "epoch": 3.128523111612176, "grad_norm": 0.13231739401817322, "learning_rate": 4.946666666666667e-06, "loss": 0.2182, "step": 2775 }, { "epoch": 3.156708004509583, "grad_norm": 0.11936808377504349, "learning_rate": 4.891111111111111e-06, "loss": 0.2279, "step": 2800 }, { "epoch": 3.1848928974069897, "grad_norm": 3.4271562099456787, "learning_rate": 4.835555555555556e-06, "loss": 0.2313, "step": 2825 }, { "epoch": 3.2130777903043968, "grad_norm": 0.13328012824058533, "learning_rate": 4.78e-06, "loss": 0.2176, "step": 2850 }, { "epoch": 3.241262683201804, "grad_norm": 0.15237456560134888, "learning_rate": 4.724444444444445e-06, "loss": 0.2397, "step": 2875 }, { "epoch": 3.269447576099211, "grad_norm": 0.13094407320022583, "learning_rate": 4.66888888888889e-06, "loss": 0.2509, "step": 2900 }, { "epoch": 3.2976324689966177, "grad_norm": 0.14969752728939056, "learning_rate": 4.613333333333334e-06, "loss": 0.2281, "step": 2925 }, { "epoch": 3.3258173618940248, "grad_norm": 0.2272210568189621, "learning_rate": 4.557777777777778e-06, "loss": 0.2539, "step": 2950 }, { "epoch": 3.354002254791432, "grad_norm": 0.1374891996383667, "learning_rate": 4.502222222222223e-06, "loss": 0.2249, "step": 2975 }, { "epoch": 3.382187147688839, "grad_norm": 0.14481812715530396, "learning_rate": 4.446666666666667e-06, "loss": 0.2385, "step": 3000 }, { "epoch": 3.382187147688839, "eval_loss": 1.3986942768096924, "eval_runtime": 3116.7262, "eval_samples_per_second": 0.538, "eval_steps_per_second": 0.017, "eval_wer": 183.03517773771554, "step": 3000 }, { "epoch": 3.4103720405862457, "grad_norm": 0.14197325706481934, "learning_rate": 4.391111111111112e-06, "loss": 0.2263, "step": 3025 }, { "epoch": 3.4385569334836528, "grad_norm": 2.5543010234832764, "learning_rate": 4.3355555555555565e-06, "loss": 0.2168, "step": 3050 }, { "epoch": 3.46674182638106, "grad_norm": 0.3014455735683441, "learning_rate": 4.2800000000000005e-06, "loss": 0.2369, "step": 3075 }, { "epoch": 3.4949267192784665, "grad_norm": 0.11696666479110718, "learning_rate": 4.2244444444444446e-06, "loss": 0.229, "step": 3100 }, { "epoch": 3.5231116121758737, "grad_norm": 0.14176321029663086, "learning_rate": 4.168888888888889e-06, "loss": 0.2215, "step": 3125 }, { "epoch": 3.5512965050732808, "grad_norm": 2.6928396224975586, "learning_rate": 4.1133333333333335e-06, "loss": 0.2185, "step": 3150 }, { "epoch": 3.579481397970688, "grad_norm": 0.5729812383651733, "learning_rate": 4.057777777777778e-06, "loss": 0.2259, "step": 3175 }, { "epoch": 3.6076662908680945, "grad_norm": 0.33455201983451843, "learning_rate": 4.002222222222222e-06, "loss": 0.2186, "step": 3200 }, { "epoch": 3.6358511837655016, "grad_norm": 0.16064058244228363, "learning_rate": 3.946666666666667e-06, "loss": 0.2348, "step": 3225 }, { "epoch": 3.6640360766629088, "grad_norm": 0.13294631242752075, "learning_rate": 3.891111111111111e-06, "loss": 0.2484, "step": 3250 }, { "epoch": 3.6922209695603154, "grad_norm": 0.6433837413787842, "learning_rate": 3.835555555555555e-06, "loss": 0.2384, "step": 3275 }, { "epoch": 3.7204058624577225, "grad_norm": 0.14757393300533295, "learning_rate": 3.7800000000000002e-06, "loss": 0.231, "step": 3300 }, { "epoch": 3.7485907553551296, "grad_norm": 0.34130287170410156, "learning_rate": 3.724444444444445e-06, "loss": 0.2477, "step": 3325 }, { "epoch": 3.7767756482525368, "grad_norm": 0.12332061678171158, "learning_rate": 3.668888888888889e-06, "loss": 0.2437, "step": 3350 }, { "epoch": 3.804960541149944, "grad_norm": 0.1419714242219925, "learning_rate": 3.6133333333333336e-06, "loss": 0.2223, "step": 3375 }, { "epoch": 3.8331454340473505, "grad_norm": 0.14998725056648254, "learning_rate": 3.5577777777777785e-06, "loss": 0.2322, "step": 3400 }, { "epoch": 3.8613303269447576, "grad_norm": 0.14752830564975739, "learning_rate": 3.5022222222222225e-06, "loss": 0.2419, "step": 3425 }, { "epoch": 3.8895152198421648, "grad_norm": 0.19776736199855804, "learning_rate": 3.446666666666667e-06, "loss": 0.2214, "step": 3450 }, { "epoch": 3.9177001127395714, "grad_norm": 0.12292192131280899, "learning_rate": 3.391111111111111e-06, "loss": 0.2327, "step": 3475 }, { "epoch": 3.9458850056369785, "grad_norm": 0.12656843662261963, "learning_rate": 3.335555555555556e-06, "loss": 0.2244, "step": 3500 }, { "epoch": 3.9740698985343856, "grad_norm": 0.14774179458618164, "learning_rate": 3.2800000000000004e-06, "loss": 0.2163, "step": 3525 }, { "epoch": 4.002254791431793, "grad_norm": 0.1358705759048462, "learning_rate": 3.2244444444444444e-06, "loss": 0.2274, "step": 3550 }, { "epoch": 4.0304396843292, "grad_norm": 0.14576297998428345, "learning_rate": 3.1688888888888893e-06, "loss": 0.2239, "step": 3575 }, { "epoch": 4.058624577226607, "grad_norm": 0.13178740441799164, "learning_rate": 3.1133333333333337e-06, "loss": 0.22, "step": 3600 }, { "epoch": 4.086809470124013, "grad_norm": 0.7160453200340271, "learning_rate": 3.0577777777777778e-06, "loss": 0.2244, "step": 3625 }, { "epoch": 4.11499436302142, "grad_norm": 0.5130426287651062, "learning_rate": 3.0022222222222227e-06, "loss": 0.2194, "step": 3650 }, { "epoch": 4.143179255918827, "grad_norm": 0.14089062809944153, "learning_rate": 2.946666666666667e-06, "loss": 0.2381, "step": 3675 }, { "epoch": 4.1713641488162345, "grad_norm": 3.9715943336486816, "learning_rate": 2.891111111111111e-06, "loss": 0.2209, "step": 3700 }, { "epoch": 4.199549041713642, "grad_norm": 0.5900473594665527, "learning_rate": 2.835555555555556e-06, "loss": 0.2273, "step": 3725 }, { "epoch": 4.227733934611049, "grad_norm": 0.13557352125644684, "learning_rate": 2.7800000000000005e-06, "loss": 0.2112, "step": 3750 }, { "epoch": 4.255918827508456, "grad_norm": 0.17545416951179504, "learning_rate": 2.7244444444444445e-06, "loss": 0.2403, "step": 3775 }, { "epoch": 4.284103720405862, "grad_norm": 0.17527706921100616, "learning_rate": 2.6688888888888894e-06, "loss": 0.2182, "step": 3800 }, { "epoch": 4.312288613303269, "grad_norm": 0.12308468669652939, "learning_rate": 2.6133333333333334e-06, "loss": 0.2297, "step": 3825 }, { "epoch": 4.340473506200676, "grad_norm": 0.1240859255194664, "learning_rate": 2.557777777777778e-06, "loss": 0.2391, "step": 3850 }, { "epoch": 4.368658399098083, "grad_norm": 0.24992740154266357, "learning_rate": 2.5022222222222224e-06, "loss": 0.2386, "step": 3875 }, { "epoch": 4.3968432919954905, "grad_norm": 0.15345261991024017, "learning_rate": 2.446666666666667e-06, "loss": 0.2223, "step": 3900 }, { "epoch": 4.425028184892898, "grad_norm": 0.10449715703725815, "learning_rate": 2.3911111111111113e-06, "loss": 0.2794, "step": 3925 }, { "epoch": 4.453213077790305, "grad_norm": 0.12438195198774338, "learning_rate": 2.3355555555555557e-06, "loss": 0.2313, "step": 3950 }, { "epoch": 4.481397970687711, "grad_norm": 0.11366376280784607, "learning_rate": 2.28e-06, "loss": 0.2357, "step": 3975 }, { "epoch": 4.509582863585118, "grad_norm": 0.12389075011014938, "learning_rate": 2.2244444444444447e-06, "loss": 0.24, "step": 4000 }, { "epoch": 4.509582863585118, "eval_loss": 1.378143548965454, "eval_runtime": 3077.3495, "eval_samples_per_second": 0.545, "eval_steps_per_second": 0.017, "eval_wer": 176.00887532935792, "step": 4000 } ], "logging_steps": 25, "max_steps": 5000, "num_input_tokens_seen": 0, "num_train_epochs": 6, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.854868401487872e+19, "train_batch_size": 16, "trial_name": null, "trial_params": null }