| { | |
| "best_global_step": 1000, | |
| "best_metric": 168.64974807007812, | |
| "best_model_checkpoint": "./whisper-small-finetuned-multilingual-on-kaggle-v2/checkpoint-1000", | |
| "epoch": 4.509582863585118, | |
| "eval_steps": 1000, | |
| "global_step": 4000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.02818489289740699, | |
| "grad_norm": 0.6385474801063538, | |
| "learning_rate": 4.800000000000001e-07, | |
| "loss": 0.436, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.05636978579481398, | |
| "grad_norm": 0.364500492811203, | |
| "learning_rate": 9.800000000000001e-07, | |
| "loss": 0.4351, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.08455467869222097, | |
| "grad_norm": 0.249020516872406, | |
| "learning_rate": 1.48e-06, | |
| "loss": 0.4622, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.11273957158962795, | |
| "grad_norm": 0.6031928062438965, | |
| "learning_rate": 1.98e-06, | |
| "loss": 0.4681, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.14092446448703494, | |
| "grad_norm": 0.3203608989715576, | |
| "learning_rate": 2.4800000000000004e-06, | |
| "loss": 0.4639, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.16910935738444194, | |
| "grad_norm": 0.6545065641403198, | |
| "learning_rate": 2.9800000000000003e-06, | |
| "loss": 0.4419, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.19729425028184894, | |
| "grad_norm": 0.2052975744009018, | |
| "learning_rate": 3.48e-06, | |
| "loss": 0.4438, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.2254791431792559, | |
| "grad_norm": 0.6051822304725647, | |
| "learning_rate": 3.980000000000001e-06, | |
| "loss": 0.4034, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.25366403607666294, | |
| "grad_norm": 0.6141038537025452, | |
| "learning_rate": 4.48e-06, | |
| "loss": 0.4104, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.2818489289740699, | |
| "grad_norm": 0.534954845905304, | |
| "learning_rate": 4.980000000000001e-06, | |
| "loss": 0.4167, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.3100338218714769, | |
| "grad_norm": 0.6292374730110168, | |
| "learning_rate": 5.480000000000001e-06, | |
| "loss": 0.387, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.3382187147688839, | |
| "grad_norm": 0.5661519765853882, | |
| "learning_rate": 5.98e-06, | |
| "loss": 0.4088, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.3664036076662909, | |
| "grad_norm": 0.3687807321548462, | |
| "learning_rate": 6.480000000000001e-06, | |
| "loss": 0.3419, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.3945885005636979, | |
| "grad_norm": 0.5748394131660461, | |
| "learning_rate": 6.98e-06, | |
| "loss": 0.3881, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.4227733934611049, | |
| "grad_norm": 0.40532639622688293, | |
| "learning_rate": 7.48e-06, | |
| "loss": 0.3589, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.4509582863585118, | |
| "grad_norm": 0.2873406410217285, | |
| "learning_rate": 7.980000000000002e-06, | |
| "loss": 0.3263, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.4791431792559188, | |
| "grad_norm": 0.3314521908760071, | |
| "learning_rate": 8.48e-06, | |
| "loss": 0.3614, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.5073280721533259, | |
| "grad_norm": 0.1603882908821106, | |
| "learning_rate": 8.98e-06, | |
| "loss": 0.2867, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.5355129650507328, | |
| "grad_norm": 0.2617914378643036, | |
| "learning_rate": 9.48e-06, | |
| "loss": 0.3226, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.5636978579481398, | |
| "grad_norm": 0.16243544220924377, | |
| "learning_rate": 9.980000000000001e-06, | |
| "loss": 0.2966, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.5918827508455468, | |
| "grad_norm": 0.21829640865325928, | |
| "learning_rate": 9.946666666666667e-06, | |
| "loss": 0.2815, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.6200676437429538, | |
| "grad_norm": 0.30274373292922974, | |
| "learning_rate": 9.891111111111113e-06, | |
| "loss": 0.2956, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.6482525366403608, | |
| "grad_norm": 0.5401255488395691, | |
| "learning_rate": 9.835555555555556e-06, | |
| "loss": 0.2772, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.6764374295377678, | |
| "grad_norm": 0.596587061882019, | |
| "learning_rate": 9.780000000000001e-06, | |
| "loss": 0.2943, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.7046223224351748, | |
| "grad_norm": 0.2570999562740326, | |
| "learning_rate": 9.724444444444445e-06, | |
| "loss": 0.2749, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.7328072153325818, | |
| "grad_norm": 0.18179918825626373, | |
| "learning_rate": 9.66888888888889e-06, | |
| "loss": 0.2767, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.7609921082299888, | |
| "grad_norm": 0.13621068000793457, | |
| "learning_rate": 9.613333333333335e-06, | |
| "loss": 0.2697, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.7891770011273957, | |
| "grad_norm": 0.2660331130027771, | |
| "learning_rate": 9.557777777777777e-06, | |
| "loss": 0.2742, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.8173618940248027, | |
| "grad_norm": 0.11947935819625854, | |
| "learning_rate": 9.502222222222223e-06, | |
| "loss": 0.2753, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.8455467869222097, | |
| "grad_norm": 0.2724321484565735, | |
| "learning_rate": 9.446666666666667e-06, | |
| "loss": 0.2494, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.8737316798196166, | |
| "grad_norm": 0.11410418152809143, | |
| "learning_rate": 9.391111111111111e-06, | |
| "loss": 0.264, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.9019165727170236, | |
| "grad_norm": 0.13550838828086853, | |
| "learning_rate": 9.335555555555557e-06, | |
| "loss": 0.2611, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.9301014656144306, | |
| "grad_norm": 1.507051706314087, | |
| "learning_rate": 9.280000000000001e-06, | |
| "loss": 0.2748, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.9582863585118376, | |
| "grad_norm": 0.11221399158239365, | |
| "learning_rate": 9.224444444444445e-06, | |
| "loss": 0.2972, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.9864712514092446, | |
| "grad_norm": 0.705410361289978, | |
| "learning_rate": 9.168888888888889e-06, | |
| "loss": 0.2527, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 1.0146561443066517, | |
| "grad_norm": 0.13268794119358063, | |
| "learning_rate": 9.113333333333335e-06, | |
| "loss": 0.2646, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.0428410372040586, | |
| "grad_norm": 1.25346839427948, | |
| "learning_rate": 9.057777777777779e-06, | |
| "loss": 0.265, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 1.0710259301014655, | |
| "grad_norm": 1.471956491470337, | |
| "learning_rate": 9.002222222222223e-06, | |
| "loss": 0.2856, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.0992108229988726, | |
| "grad_norm": 0.13319148123264313, | |
| "learning_rate": 8.946666666666669e-06, | |
| "loss": 0.2552, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 1.1273957158962795, | |
| "grad_norm": 0.7707216143608093, | |
| "learning_rate": 8.891111111111111e-06, | |
| "loss": 0.2825, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.1273957158962795, | |
| "eval_loss": 1.5161501169204712, | |
| "eval_runtime": 3096.2186, | |
| "eval_samples_per_second": 0.541, | |
| "eval_steps_per_second": 0.017, | |
| "eval_wer": 168.64974807007812, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.1555806087936866, | |
| "grad_norm": 1.633041262626648, | |
| "learning_rate": 8.835555555555557e-06, | |
| "loss": 0.2616, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 1.1837655016910935, | |
| "grad_norm": 0.19760851562023163, | |
| "learning_rate": 8.78e-06, | |
| "loss": 0.2414, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.2119503945885006, | |
| "grad_norm": 0.6687982678413391, | |
| "learning_rate": 8.724444444444445e-06, | |
| "loss": 0.2686, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 1.2401352874859075, | |
| "grad_norm": 0.09788186848163605, | |
| "learning_rate": 8.66888888888889e-06, | |
| "loss": 0.2436, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.2683201803833146, | |
| "grad_norm": 0.3376760482788086, | |
| "learning_rate": 8.613333333333333e-06, | |
| "loss": 0.2455, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 1.2965050732807215, | |
| "grad_norm": 0.16203612089157104, | |
| "learning_rate": 8.557777777777778e-06, | |
| "loss": 0.2656, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 1.3246899661781284, | |
| "grad_norm": 0.12653979659080505, | |
| "learning_rate": 8.502222222222223e-06, | |
| "loss": 0.2428, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 1.3528748590755355, | |
| "grad_norm": 0.12609504163265228, | |
| "learning_rate": 8.446666666666668e-06, | |
| "loss": 0.2584, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.3810597519729426, | |
| "grad_norm": 0.6176909804344177, | |
| "learning_rate": 8.391111111111112e-06, | |
| "loss": 0.2551, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 1.4092446448703495, | |
| "grad_norm": 0.14362627267837524, | |
| "learning_rate": 8.335555555555556e-06, | |
| "loss": 0.2623, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 1.4374295377677564, | |
| "grad_norm": 0.11503814905881882, | |
| "learning_rate": 8.28e-06, | |
| "loss": 0.2581, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 1.4656144306651635, | |
| "grad_norm": 0.13389258086681366, | |
| "learning_rate": 8.224444444444444e-06, | |
| "loss": 0.2507, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.4937993235625704, | |
| "grad_norm": 1.0773035287857056, | |
| "learning_rate": 8.16888888888889e-06, | |
| "loss": 0.2409, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 1.5219842164599775, | |
| "grad_norm": 0.6458689570426941, | |
| "learning_rate": 8.113333333333334e-06, | |
| "loss": 0.24, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 1.5501691093573844, | |
| "grad_norm": 0.11144981533288956, | |
| "learning_rate": 8.057777777777778e-06, | |
| "loss": 0.2624, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 1.5783540022547915, | |
| "grad_norm": 0.963024914264679, | |
| "learning_rate": 8.002222222222222e-06, | |
| "loss": 0.2381, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.6065388951521984, | |
| "grad_norm": 2.91090989112854, | |
| "learning_rate": 7.946666666666666e-06, | |
| "loss": 0.2548, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 1.6347237880496053, | |
| "grad_norm": 0.5699247717857361, | |
| "learning_rate": 7.891111111111112e-06, | |
| "loss": 0.273, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 1.6629086809470124, | |
| "grad_norm": 0.11966383457183838, | |
| "learning_rate": 7.835555555555556e-06, | |
| "loss": 0.2538, | |
| "step": 1475 | |
| }, | |
| { | |
| "epoch": 1.6910935738444195, | |
| "grad_norm": 0.49657556414604187, | |
| "learning_rate": 7.78e-06, | |
| "loss": 0.2446, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.7192784667418264, | |
| "grad_norm": 0.21423515677452087, | |
| "learning_rate": 7.724444444444446e-06, | |
| "loss": 0.2365, | |
| "step": 1525 | |
| }, | |
| { | |
| "epoch": 1.7474633596392333, | |
| "grad_norm": 0.3403068482875824, | |
| "learning_rate": 7.66888888888889e-06, | |
| "loss": 0.2382, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 1.7756482525366404, | |
| "grad_norm": 0.12040483951568604, | |
| "learning_rate": 7.613333333333334e-06, | |
| "loss": 0.2433, | |
| "step": 1575 | |
| }, | |
| { | |
| "epoch": 1.8038331454340475, | |
| "grad_norm": 0.1376182585954666, | |
| "learning_rate": 7.557777777777779e-06, | |
| "loss": 0.251, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.8320180383314544, | |
| "grad_norm": 0.4510025382041931, | |
| "learning_rate": 7.502222222222223e-06, | |
| "loss": 0.2285, | |
| "step": 1625 | |
| }, | |
| { | |
| "epoch": 1.8602029312288613, | |
| "grad_norm": 0.19896015524864197, | |
| "learning_rate": 7.446666666666668e-06, | |
| "loss": 0.2386, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 1.8883878241262684, | |
| "grad_norm": 0.12030182778835297, | |
| "learning_rate": 7.3911111111111125e-06, | |
| "loss": 0.2544, | |
| "step": 1675 | |
| }, | |
| { | |
| "epoch": 1.9165727170236753, | |
| "grad_norm": 0.13691359758377075, | |
| "learning_rate": 7.335555555555556e-06, | |
| "loss": 0.2426, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 1.9447576099210822, | |
| "grad_norm": 0.1743856519460678, | |
| "learning_rate": 7.280000000000001e-06, | |
| "loss": 0.2532, | |
| "step": 1725 | |
| }, | |
| { | |
| "epoch": 1.9729425028184893, | |
| "grad_norm": 0.19390033185482025, | |
| "learning_rate": 7.224444444444445e-06, | |
| "loss": 0.2397, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 2.0011273957158964, | |
| "grad_norm": 0.22048410773277283, | |
| "learning_rate": 7.1688888888888895e-06, | |
| "loss": 0.2398, | |
| "step": 1775 | |
| }, | |
| { | |
| "epoch": 2.0293122886133035, | |
| "grad_norm": 0.12077363580465317, | |
| "learning_rate": 7.113333333333334e-06, | |
| "loss": 0.2354, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 2.05749718151071, | |
| "grad_norm": 0.6927027106285095, | |
| "learning_rate": 7.057777777777778e-06, | |
| "loss": 0.241, | |
| "step": 1825 | |
| }, | |
| { | |
| "epoch": 2.0856820744081173, | |
| "grad_norm": 0.1784224808216095, | |
| "learning_rate": 7.0022222222222225e-06, | |
| "loss": 0.2477, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 2.1138669673055244, | |
| "grad_norm": 0.13008733093738556, | |
| "learning_rate": 6.946666666666667e-06, | |
| "loss": 0.2634, | |
| "step": 1875 | |
| }, | |
| { | |
| "epoch": 2.142051860202931, | |
| "grad_norm": 0.11152646690607071, | |
| "learning_rate": 6.891111111111111e-06, | |
| "loss": 0.2198, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 2.170236753100338, | |
| "grad_norm": 0.15575166046619415, | |
| "learning_rate": 6.835555555555556e-06, | |
| "loss": 0.2545, | |
| "step": 1925 | |
| }, | |
| { | |
| "epoch": 2.1984216459977453, | |
| "grad_norm": 0.13412870466709137, | |
| "learning_rate": 6.780000000000001e-06, | |
| "loss": 0.2353, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 2.2266065388951524, | |
| "grad_norm": 0.1718331277370453, | |
| "learning_rate": 6.724444444444444e-06, | |
| "loss": 0.2519, | |
| "step": 1975 | |
| }, | |
| { | |
| "epoch": 2.254791431792559, | |
| "grad_norm": 0.17061075568199158, | |
| "learning_rate": 6.668888888888889e-06, | |
| "loss": 0.2179, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 2.254791431792559, | |
| "eval_loss": 1.4350669384002686, | |
| "eval_runtime": 3183.8495, | |
| "eval_samples_per_second": 0.526, | |
| "eval_steps_per_second": 0.017, | |
| "eval_wer": 179.06901493089262, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 2.282976324689966, | |
| "grad_norm": 0.12167395651340485, | |
| "learning_rate": 6.613333333333334e-06, | |
| "loss": 0.2362, | |
| "step": 2025 | |
| }, | |
| { | |
| "epoch": 2.3111612175873733, | |
| "grad_norm": 0.14669859409332275, | |
| "learning_rate": 6.557777777777778e-06, | |
| "loss": 0.2337, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 2.3393461104847804, | |
| "grad_norm": 0.8504271507263184, | |
| "learning_rate": 6.502222222222223e-06, | |
| "loss": 0.2364, | |
| "step": 2075 | |
| }, | |
| { | |
| "epoch": 2.367531003382187, | |
| "grad_norm": 0.09018735587596893, | |
| "learning_rate": 6.446666666666668e-06, | |
| "loss": 0.2449, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 2.395715896279594, | |
| "grad_norm": 1.927909255027771, | |
| "learning_rate": 6.391111111111111e-06, | |
| "loss": 0.2374, | |
| "step": 2125 | |
| }, | |
| { | |
| "epoch": 2.4239007891770012, | |
| "grad_norm": 0.1180146113038063, | |
| "learning_rate": 6.335555555555556e-06, | |
| "loss": 0.2339, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 2.452085682074408, | |
| "grad_norm": 0.14500541985034943, | |
| "learning_rate": 6.280000000000001e-06, | |
| "loss": 0.2677, | |
| "step": 2175 | |
| }, | |
| { | |
| "epoch": 2.480270574971815, | |
| "grad_norm": 0.10550981760025024, | |
| "learning_rate": 6.224444444444445e-06, | |
| "loss": 0.2435, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 2.508455467869222, | |
| "grad_norm": 0.8889521360397339, | |
| "learning_rate": 6.16888888888889e-06, | |
| "loss": 0.2498, | |
| "step": 2225 | |
| }, | |
| { | |
| "epoch": 2.5366403607666292, | |
| "grad_norm": 2.0345895290374756, | |
| "learning_rate": 6.113333333333333e-06, | |
| "loss": 0.2339, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 2.5648252536640364, | |
| "grad_norm": 0.13331718742847443, | |
| "learning_rate": 6.057777777777778e-06, | |
| "loss": 0.241, | |
| "step": 2275 | |
| }, | |
| { | |
| "epoch": 2.593010146561443, | |
| "grad_norm": 1.2904160022735596, | |
| "learning_rate": 6.002222222222223e-06, | |
| "loss": 0.2518, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 2.62119503945885, | |
| "grad_norm": 0.11779336631298065, | |
| "learning_rate": 5.946666666666668e-06, | |
| "loss": 0.236, | |
| "step": 2325 | |
| }, | |
| { | |
| "epoch": 2.649379932356257, | |
| "grad_norm": 0.11726631969213486, | |
| "learning_rate": 5.891111111111112e-06, | |
| "loss": 0.2335, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 2.677564825253664, | |
| "grad_norm": 0.12784917652606964, | |
| "learning_rate": 5.8355555555555565e-06, | |
| "loss": 0.2315, | |
| "step": 2375 | |
| }, | |
| { | |
| "epoch": 2.705749718151071, | |
| "grad_norm": 0.1314825415611267, | |
| "learning_rate": 5.78e-06, | |
| "loss": 0.235, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 2.733934611048478, | |
| "grad_norm": 0.21480953693389893, | |
| "learning_rate": 5.724444444444445e-06, | |
| "loss": 0.2367, | |
| "step": 2425 | |
| }, | |
| { | |
| "epoch": 2.7621195039458852, | |
| "grad_norm": 0.11732250452041626, | |
| "learning_rate": 5.6688888888888895e-06, | |
| "loss": 0.2336, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 2.790304396843292, | |
| "grad_norm": 0.11237554252147675, | |
| "learning_rate": 5.613333333333334e-06, | |
| "loss": 0.2501, | |
| "step": 2475 | |
| }, | |
| { | |
| "epoch": 2.818489289740699, | |
| "grad_norm": 0.12970662117004395, | |
| "learning_rate": 5.557777777777778e-06, | |
| "loss": 0.2266, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 2.846674182638106, | |
| "grad_norm": 0.19031056761741638, | |
| "learning_rate": 5.5022222222222224e-06, | |
| "loss": 0.242, | |
| "step": 2525 | |
| }, | |
| { | |
| "epoch": 2.874859075535513, | |
| "grad_norm": 0.16265034675598145, | |
| "learning_rate": 5.4466666666666665e-06, | |
| "loss": 0.2278, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 2.90304396843292, | |
| "grad_norm": 0.27451595664024353, | |
| "learning_rate": 5.391111111111111e-06, | |
| "loss": 0.2493, | |
| "step": 2575 | |
| }, | |
| { | |
| "epoch": 2.931228861330327, | |
| "grad_norm": 0.17422199249267578, | |
| "learning_rate": 5.335555555555556e-06, | |
| "loss": 0.2407, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 2.959413754227734, | |
| "grad_norm": 0.11225639283657074, | |
| "learning_rate": 5.28e-06, | |
| "loss": 0.221, | |
| "step": 2625 | |
| }, | |
| { | |
| "epoch": 2.987598647125141, | |
| "grad_norm": 0.11298167705535889, | |
| "learning_rate": 5.224444444444445e-06, | |
| "loss": 0.2253, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 3.015783540022548, | |
| "grad_norm": 0.13156744837760925, | |
| "learning_rate": 5.168888888888889e-06, | |
| "loss": 0.2494, | |
| "step": 2675 | |
| }, | |
| { | |
| "epoch": 3.043968432919955, | |
| "grad_norm": 2.454240083694458, | |
| "learning_rate": 5.113333333333333e-06, | |
| "loss": 0.2615, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 3.0721533258173617, | |
| "grad_norm": 0.11773987859487534, | |
| "learning_rate": 5.057777777777778e-06, | |
| "loss": 0.2292, | |
| "step": 2725 | |
| }, | |
| { | |
| "epoch": 3.100338218714769, | |
| "grad_norm": 2.4229350090026855, | |
| "learning_rate": 5.002222222222223e-06, | |
| "loss": 0.2492, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 3.128523111612176, | |
| "grad_norm": 0.13231739401817322, | |
| "learning_rate": 4.946666666666667e-06, | |
| "loss": 0.2182, | |
| "step": 2775 | |
| }, | |
| { | |
| "epoch": 3.156708004509583, | |
| "grad_norm": 0.11936808377504349, | |
| "learning_rate": 4.891111111111111e-06, | |
| "loss": 0.2279, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 3.1848928974069897, | |
| "grad_norm": 3.4271562099456787, | |
| "learning_rate": 4.835555555555556e-06, | |
| "loss": 0.2313, | |
| "step": 2825 | |
| }, | |
| { | |
| "epoch": 3.2130777903043968, | |
| "grad_norm": 0.13328012824058533, | |
| "learning_rate": 4.78e-06, | |
| "loss": 0.2176, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 3.241262683201804, | |
| "grad_norm": 0.15237456560134888, | |
| "learning_rate": 4.724444444444445e-06, | |
| "loss": 0.2397, | |
| "step": 2875 | |
| }, | |
| { | |
| "epoch": 3.269447576099211, | |
| "grad_norm": 0.13094407320022583, | |
| "learning_rate": 4.66888888888889e-06, | |
| "loss": 0.2509, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 3.2976324689966177, | |
| "grad_norm": 0.14969752728939056, | |
| "learning_rate": 4.613333333333334e-06, | |
| "loss": 0.2281, | |
| "step": 2925 | |
| }, | |
| { | |
| "epoch": 3.3258173618940248, | |
| "grad_norm": 0.2272210568189621, | |
| "learning_rate": 4.557777777777778e-06, | |
| "loss": 0.2539, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 3.354002254791432, | |
| "grad_norm": 0.1374891996383667, | |
| "learning_rate": 4.502222222222223e-06, | |
| "loss": 0.2249, | |
| "step": 2975 | |
| }, | |
| { | |
| "epoch": 3.382187147688839, | |
| "grad_norm": 0.14481812715530396, | |
| "learning_rate": 4.446666666666667e-06, | |
| "loss": 0.2385, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 3.382187147688839, | |
| "eval_loss": 1.3986942768096924, | |
| "eval_runtime": 3116.7262, | |
| "eval_samples_per_second": 0.538, | |
| "eval_steps_per_second": 0.017, | |
| "eval_wer": 183.03517773771554, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 3.4103720405862457, | |
| "grad_norm": 0.14197325706481934, | |
| "learning_rate": 4.391111111111112e-06, | |
| "loss": 0.2263, | |
| "step": 3025 | |
| }, | |
| { | |
| "epoch": 3.4385569334836528, | |
| "grad_norm": 2.5543010234832764, | |
| "learning_rate": 4.3355555555555565e-06, | |
| "loss": 0.2168, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 3.46674182638106, | |
| "grad_norm": 0.3014455735683441, | |
| "learning_rate": 4.2800000000000005e-06, | |
| "loss": 0.2369, | |
| "step": 3075 | |
| }, | |
| { | |
| "epoch": 3.4949267192784665, | |
| "grad_norm": 0.11696666479110718, | |
| "learning_rate": 4.2244444444444446e-06, | |
| "loss": 0.229, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 3.5231116121758737, | |
| "grad_norm": 0.14176321029663086, | |
| "learning_rate": 4.168888888888889e-06, | |
| "loss": 0.2215, | |
| "step": 3125 | |
| }, | |
| { | |
| "epoch": 3.5512965050732808, | |
| "grad_norm": 2.6928396224975586, | |
| "learning_rate": 4.1133333333333335e-06, | |
| "loss": 0.2185, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 3.579481397970688, | |
| "grad_norm": 0.5729812383651733, | |
| "learning_rate": 4.057777777777778e-06, | |
| "loss": 0.2259, | |
| "step": 3175 | |
| }, | |
| { | |
| "epoch": 3.6076662908680945, | |
| "grad_norm": 0.33455201983451843, | |
| "learning_rate": 4.002222222222222e-06, | |
| "loss": 0.2186, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 3.6358511837655016, | |
| "grad_norm": 0.16064058244228363, | |
| "learning_rate": 3.946666666666667e-06, | |
| "loss": 0.2348, | |
| "step": 3225 | |
| }, | |
| { | |
| "epoch": 3.6640360766629088, | |
| "grad_norm": 0.13294631242752075, | |
| "learning_rate": 3.891111111111111e-06, | |
| "loss": 0.2484, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 3.6922209695603154, | |
| "grad_norm": 0.6433837413787842, | |
| "learning_rate": 3.835555555555555e-06, | |
| "loss": 0.2384, | |
| "step": 3275 | |
| }, | |
| { | |
| "epoch": 3.7204058624577225, | |
| "grad_norm": 0.14757393300533295, | |
| "learning_rate": 3.7800000000000002e-06, | |
| "loss": 0.231, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 3.7485907553551296, | |
| "grad_norm": 0.34130287170410156, | |
| "learning_rate": 3.724444444444445e-06, | |
| "loss": 0.2477, | |
| "step": 3325 | |
| }, | |
| { | |
| "epoch": 3.7767756482525368, | |
| "grad_norm": 0.12332061678171158, | |
| "learning_rate": 3.668888888888889e-06, | |
| "loss": 0.2437, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 3.804960541149944, | |
| "grad_norm": 0.1419714242219925, | |
| "learning_rate": 3.6133333333333336e-06, | |
| "loss": 0.2223, | |
| "step": 3375 | |
| }, | |
| { | |
| "epoch": 3.8331454340473505, | |
| "grad_norm": 0.14998725056648254, | |
| "learning_rate": 3.5577777777777785e-06, | |
| "loss": 0.2322, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 3.8613303269447576, | |
| "grad_norm": 0.14752830564975739, | |
| "learning_rate": 3.5022222222222225e-06, | |
| "loss": 0.2419, | |
| "step": 3425 | |
| }, | |
| { | |
| "epoch": 3.8895152198421648, | |
| "grad_norm": 0.19776736199855804, | |
| "learning_rate": 3.446666666666667e-06, | |
| "loss": 0.2214, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 3.9177001127395714, | |
| "grad_norm": 0.12292192131280899, | |
| "learning_rate": 3.391111111111111e-06, | |
| "loss": 0.2327, | |
| "step": 3475 | |
| }, | |
| { | |
| "epoch": 3.9458850056369785, | |
| "grad_norm": 0.12656843662261963, | |
| "learning_rate": 3.335555555555556e-06, | |
| "loss": 0.2244, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 3.9740698985343856, | |
| "grad_norm": 0.14774179458618164, | |
| "learning_rate": 3.2800000000000004e-06, | |
| "loss": 0.2163, | |
| "step": 3525 | |
| }, | |
| { | |
| "epoch": 4.002254791431793, | |
| "grad_norm": 0.1358705759048462, | |
| "learning_rate": 3.2244444444444444e-06, | |
| "loss": 0.2274, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 4.0304396843292, | |
| "grad_norm": 0.14576297998428345, | |
| "learning_rate": 3.1688888888888893e-06, | |
| "loss": 0.2239, | |
| "step": 3575 | |
| }, | |
| { | |
| "epoch": 4.058624577226607, | |
| "grad_norm": 0.13178740441799164, | |
| "learning_rate": 3.1133333333333337e-06, | |
| "loss": 0.22, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 4.086809470124013, | |
| "grad_norm": 0.7160453200340271, | |
| "learning_rate": 3.0577777777777778e-06, | |
| "loss": 0.2244, | |
| "step": 3625 | |
| }, | |
| { | |
| "epoch": 4.11499436302142, | |
| "grad_norm": 0.5130426287651062, | |
| "learning_rate": 3.0022222222222227e-06, | |
| "loss": 0.2194, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 4.143179255918827, | |
| "grad_norm": 0.14089062809944153, | |
| "learning_rate": 2.946666666666667e-06, | |
| "loss": 0.2381, | |
| "step": 3675 | |
| }, | |
| { | |
| "epoch": 4.1713641488162345, | |
| "grad_norm": 3.9715943336486816, | |
| "learning_rate": 2.891111111111111e-06, | |
| "loss": 0.2209, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 4.199549041713642, | |
| "grad_norm": 0.5900473594665527, | |
| "learning_rate": 2.835555555555556e-06, | |
| "loss": 0.2273, | |
| "step": 3725 | |
| }, | |
| { | |
| "epoch": 4.227733934611049, | |
| "grad_norm": 0.13557352125644684, | |
| "learning_rate": 2.7800000000000005e-06, | |
| "loss": 0.2112, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 4.255918827508456, | |
| "grad_norm": 0.17545416951179504, | |
| "learning_rate": 2.7244444444444445e-06, | |
| "loss": 0.2403, | |
| "step": 3775 | |
| }, | |
| { | |
| "epoch": 4.284103720405862, | |
| "grad_norm": 0.17527706921100616, | |
| "learning_rate": 2.6688888888888894e-06, | |
| "loss": 0.2182, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 4.312288613303269, | |
| "grad_norm": 0.12308468669652939, | |
| "learning_rate": 2.6133333333333334e-06, | |
| "loss": 0.2297, | |
| "step": 3825 | |
| }, | |
| { | |
| "epoch": 4.340473506200676, | |
| "grad_norm": 0.1240859255194664, | |
| "learning_rate": 2.557777777777778e-06, | |
| "loss": 0.2391, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 4.368658399098083, | |
| "grad_norm": 0.24992740154266357, | |
| "learning_rate": 2.5022222222222224e-06, | |
| "loss": 0.2386, | |
| "step": 3875 | |
| }, | |
| { | |
| "epoch": 4.3968432919954905, | |
| "grad_norm": 0.15345261991024017, | |
| "learning_rate": 2.446666666666667e-06, | |
| "loss": 0.2223, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 4.425028184892898, | |
| "grad_norm": 0.10449715703725815, | |
| "learning_rate": 2.3911111111111113e-06, | |
| "loss": 0.2794, | |
| "step": 3925 | |
| }, | |
| { | |
| "epoch": 4.453213077790305, | |
| "grad_norm": 0.12438195198774338, | |
| "learning_rate": 2.3355555555555557e-06, | |
| "loss": 0.2313, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 4.481397970687711, | |
| "grad_norm": 0.11366376280784607, | |
| "learning_rate": 2.28e-06, | |
| "loss": 0.2357, | |
| "step": 3975 | |
| }, | |
| { | |
| "epoch": 4.509582863585118, | |
| "grad_norm": 0.12389075011014938, | |
| "learning_rate": 2.2244444444444447e-06, | |
| "loss": 0.24, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 4.509582863585118, | |
| "eval_loss": 1.378143548965454, | |
| "eval_runtime": 3077.3495, | |
| "eval_samples_per_second": 0.545, | |
| "eval_steps_per_second": 0.017, | |
| "eval_wer": 176.00887532935792, | |
| "step": 4000 | |
| } | |
| ], | |
| "logging_steps": 25, | |
| "max_steps": 5000, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 6, | |
| "save_steps": 1000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.854868401487872e+19, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |