{ "best_metric": 0.8111165764879489, "best_model_checkpoint": "de-DE/wav2vec2-large-xlsr-53-german/checkpoint-6720", "epoch": 30.0, "eval_steps": 100, "global_step": 7200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.20833333333333334, "grad_norm": 1.024327278137207, "learning_rate": 6.666666666666667e-06, "loss": 4.092, "step": 50 }, { "epoch": 0.4166666666666667, "grad_norm": 1.314271092414856, "learning_rate": 1.3611111111111111e-05, "loss": 4.0311, "step": 100 }, { "epoch": 0.625, "grad_norm": 2.0937185287475586, "learning_rate": 2.0416666666666667e-05, "loss": 3.9311, "step": 150 }, { "epoch": 0.8333333333333334, "grad_norm": 1.646790862083435, "learning_rate": 2.7222222222222223e-05, "loss": 3.8451, "step": 200 }, { "epoch": 1.0, "eval_accuracy": 0.11362518445646827, "eval_f1": 0.007653610237362924, "eval_loss": 3.7495229244232178, "eval_runtime": 24.1482, "eval_samples_per_second": 84.188, "eval_steps_per_second": 1.781, "step": 240 }, { "epoch": 1.0416666666666667, "grad_norm": 1.8846683502197266, "learning_rate": 3.402777777777778e-05, "loss": 3.8069, "step": 250 }, { "epoch": 1.25, "grad_norm": 2.0058088302612305, "learning_rate": 4.0972222222222225e-05, "loss": 3.7056, "step": 300 }, { "epoch": 1.4583333333333333, "grad_norm": 3.574331045150757, "learning_rate": 4.791666666666667e-05, "loss": 3.5534, "step": 350 }, { "epoch": 1.6666666666666665, "grad_norm": 2.8944480419158936, "learning_rate": 5.486111111111112e-05, "loss": 3.3897, "step": 400 }, { "epoch": 1.875, "grad_norm": 6.055631637573242, "learning_rate": 6.180555555555556e-05, "loss": 3.1747, "step": 450 }, { "epoch": 2.0, "eval_accuracy": 0.2562715199212986, "eval_f1": 0.06666940653741649, "eval_loss": 2.7525689601898193, "eval_runtime": 23.5601, "eval_samples_per_second": 86.29, "eval_steps_per_second": 1.825, "step": 480 }, { "epoch": 2.0833333333333335, "grad_norm": 8.197975158691406, "learning_rate": 6.875e-05, "loss": 2.8232, "step": 500 }, { "epoch": 2.2916666666666665, "grad_norm": 2.209603786468506, "learning_rate": 7.569444444444445e-05, "loss": 2.6687, "step": 550 }, { "epoch": 2.5, "grad_norm": 4.554799556732178, "learning_rate": 8.263888888888889e-05, "loss": 2.4675, "step": 600 }, { "epoch": 2.7083333333333335, "grad_norm": 3.187694549560547, "learning_rate": 8.958333333333335e-05, "loss": 2.2689, "step": 650 }, { "epoch": 2.9166666666666665, "grad_norm": 5.407806396484375, "learning_rate": 9.652777777777779e-05, "loss": 2.1364, "step": 700 }, { "epoch": 3.0, "eval_accuracy": 0.5115592720118052, "eval_f1": 0.2658663959575897, "eval_loss": 1.8959603309631348, "eval_runtime": 23.6685, "eval_samples_per_second": 85.895, "eval_steps_per_second": 1.817, "step": 720 }, { "epoch": 3.125, "grad_norm": 2.8895232677459717, "learning_rate": 9.961419753086421e-05, "loss": 1.8461, "step": 750 }, { "epoch": 3.3333333333333335, "grad_norm": 7.406684398651123, "learning_rate": 9.88425925925926e-05, "loss": 1.6923, "step": 800 }, { "epoch": 3.5416666666666665, "grad_norm": 6.666975021362305, "learning_rate": 9.807098765432099e-05, "loss": 1.5639, "step": 850 }, { "epoch": 3.75, "grad_norm": 5.389110565185547, "learning_rate": 9.72993827160494e-05, "loss": 1.5243, "step": 900 }, { "epoch": 3.9583333333333335, "grad_norm": 4.376372337341309, "learning_rate": 9.652777777777779e-05, "loss": 1.4465, "step": 950 }, { "epoch": 4.0, "eval_accuracy": 0.6694540088539105, "eval_f1": 0.4768045727957133, "eval_loss": 1.3300424814224243, "eval_runtime": 23.7044, "eval_samples_per_second": 85.765, "eval_steps_per_second": 1.814, "step": 960 }, { "epoch": 4.166666666666667, "grad_norm": 3.5495402812957764, "learning_rate": 9.575617283950617e-05, "loss": 1.2025, "step": 1000 }, { "epoch": 4.375, "grad_norm": 4.167064189910889, "learning_rate": 9.498456790123458e-05, "loss": 1.1315, "step": 1050 }, { "epoch": 4.583333333333333, "grad_norm": 3.363506317138672, "learning_rate": 9.421296296296298e-05, "loss": 1.105, "step": 1100 }, { "epoch": 4.791666666666667, "grad_norm": 3.985663652420044, "learning_rate": 9.344135802469136e-05, "loss": 1.0597, "step": 1150 }, { "epoch": 5.0, "grad_norm": 6.066577434539795, "learning_rate": 9.266975308641975e-05, "loss": 1.0589, "step": 1200 }, { "epoch": 5.0, "eval_accuracy": 0.705853418593212, "eval_f1": 0.5606166555495936, "eval_loss": 1.2347984313964844, "eval_runtime": 23.5911, "eval_samples_per_second": 86.177, "eval_steps_per_second": 1.823, "step": 1200 }, { "epoch": 5.208333333333333, "grad_norm": 5.127081394195557, "learning_rate": 9.189814814814816e-05, "loss": 0.842, "step": 1250 }, { "epoch": 5.416666666666667, "grad_norm": 5.842033863067627, "learning_rate": 9.112654320987654e-05, "loss": 0.7644, "step": 1300 }, { "epoch": 5.625, "grad_norm": 9.250378608703613, "learning_rate": 9.035493827160494e-05, "loss": 0.7717, "step": 1350 }, { "epoch": 5.833333333333333, "grad_norm": 4.046441555023193, "learning_rate": 8.958333333333335e-05, "loss": 0.7479, "step": 1400 }, { "epoch": 6.0, "eval_accuracy": 0.7422528283325135, "eval_f1": 0.6232036631647458, "eval_loss": 1.0977600812911987, "eval_runtime": 23.6566, "eval_samples_per_second": 85.938, "eval_steps_per_second": 1.818, "step": 1440 }, { "epoch": 6.041666666666667, "grad_norm": 7.836002349853516, "learning_rate": 8.881172839506173e-05, "loss": 0.8124, "step": 1450 }, { "epoch": 6.25, "grad_norm": 5.824333190917969, "learning_rate": 8.804012345679012e-05, "loss": 0.5837, "step": 1500 }, { "epoch": 6.458333333333333, "grad_norm": 4.0973076820373535, "learning_rate": 8.726851851851853e-05, "loss": 0.658, "step": 1550 }, { "epoch": 6.666666666666667, "grad_norm": 6.109771251678467, "learning_rate": 8.649691358024691e-05, "loss": 0.6045, "step": 1600 }, { "epoch": 6.875, "grad_norm": 5.03196382522583, "learning_rate": 8.572530864197531e-05, "loss": 0.5712, "step": 1650 }, { "epoch": 7.0, "eval_accuracy": 0.749631087063453, "eval_f1": 0.6438025736511106, "eval_loss": 1.1345957517623901, "eval_runtime": 23.8573, "eval_samples_per_second": 85.215, "eval_steps_per_second": 1.802, "step": 1680 }, { "epoch": 7.083333333333333, "grad_norm": 2.731377124786377, "learning_rate": 8.495370370370372e-05, "loss": 0.5243, "step": 1700 }, { "epoch": 7.291666666666667, "grad_norm": 4.128145217895508, "learning_rate": 8.41820987654321e-05, "loss": 0.4794, "step": 1750 }, { "epoch": 7.5, "grad_norm": 5.915714263916016, "learning_rate": 8.34104938271605e-05, "loss": 0.4442, "step": 1800 }, { "epoch": 7.708333333333333, "grad_norm": 5.760502815246582, "learning_rate": 8.263888888888889e-05, "loss": 0.4258, "step": 1850 }, { "epoch": 7.916666666666667, "grad_norm": 8.529943466186523, "learning_rate": 8.186728395061729e-05, "loss": 0.4991, "step": 1900 }, { "epoch": 8.0, "eval_accuracy": 0.7693064436792917, "eval_f1": 0.6664067895457293, "eval_loss": 1.1221246719360352, "eval_runtime": 23.8599, "eval_samples_per_second": 85.206, "eval_steps_per_second": 1.802, "step": 1920 }, { "epoch": 8.125, "grad_norm": 3.264131784439087, "learning_rate": 8.109567901234568e-05, "loss": 0.4654, "step": 1950 }, { "epoch": 8.333333333333334, "grad_norm": 4.219313621520996, "learning_rate": 8.032407407407408e-05, "loss": 0.3621, "step": 2000 }, { "epoch": 8.541666666666666, "grad_norm": 11.9534912109375, "learning_rate": 7.955246913580247e-05, "loss": 0.3955, "step": 2050 }, { "epoch": 8.75, "grad_norm": 4.408581256866455, "learning_rate": 7.878086419753087e-05, "loss": 0.427, "step": 2100 }, { "epoch": 8.958333333333334, "grad_norm": 6.143781661987305, "learning_rate": 7.800925925925926e-05, "loss": 0.3804, "step": 2150 }, { "epoch": 9.0, "eval_accuracy": 0.7766847024102311, "eval_f1": 0.7047859015908629, "eval_loss": 1.1663849353790283, "eval_runtime": 24.2525, "eval_samples_per_second": 83.826, "eval_steps_per_second": 1.773, "step": 2160 }, { "epoch": 9.166666666666666, "grad_norm": 12.192872047424316, "learning_rate": 7.723765432098766e-05, "loss": 0.3038, "step": 2200 }, { "epoch": 9.375, "grad_norm": 1.7822233438491821, "learning_rate": 7.646604938271605e-05, "loss": 0.3035, "step": 2250 }, { "epoch": 9.583333333333334, "grad_norm": 2.351457118988037, "learning_rate": 7.569444444444445e-05, "loss": 0.3418, "step": 2300 }, { "epoch": 9.791666666666666, "grad_norm": 5.011100769042969, "learning_rate": 7.492283950617284e-05, "loss": 0.3455, "step": 2350 }, { "epoch": 10.0, "grad_norm": 2.2908878326416016, "learning_rate": 7.415123456790124e-05, "loss": 0.3304, "step": 2400 }, { "epoch": 10.0, "eval_accuracy": 0.7816035415641909, "eval_f1": 0.709637466095638, "eval_loss": 1.200905203819275, "eval_runtime": 24.2124, "eval_samples_per_second": 83.965, "eval_steps_per_second": 1.776, "step": 2400 }, { "epoch": 10.208333333333334, "grad_norm": 3.9933135509490967, "learning_rate": 7.337962962962963e-05, "loss": 0.2682, "step": 2450 }, { "epoch": 10.416666666666666, "grad_norm": 3.206864356994629, "learning_rate": 7.260802469135803e-05, "loss": 0.2185, "step": 2500 }, { "epoch": 10.625, "grad_norm": 4.931612491607666, "learning_rate": 7.183641975308642e-05, "loss": 0.2828, "step": 2550 }, { "epoch": 10.833333333333334, "grad_norm": 5.217026710510254, "learning_rate": 7.106481481481482e-05, "loss": 0.2456, "step": 2600 }, { "epoch": 11.0, "eval_accuracy": 0.7840629611411707, "eval_f1": 0.7288780431612951, "eval_loss": 1.2644785642623901, "eval_runtime": 23.6706, "eval_samples_per_second": 85.887, "eval_steps_per_second": 1.817, "step": 2640 }, { "epoch": 11.041666666666666, "grad_norm": 3.869821786880493, "learning_rate": 7.029320987654321e-05, "loss": 0.2363, "step": 2650 }, { "epoch": 11.25, "grad_norm": 2.36630916595459, "learning_rate": 6.952160493827161e-05, "loss": 0.2885, "step": 2700 }, { "epoch": 11.458333333333334, "grad_norm": 6.892541408538818, "learning_rate": 6.875e-05, "loss": 0.1969, "step": 2750 }, { "epoch": 11.666666666666666, "grad_norm": 3.639878034591675, "learning_rate": 6.79783950617284e-05, "loss": 0.2234, "step": 2800 }, { "epoch": 11.875, "grad_norm": 8.246334075927734, "learning_rate": 6.72067901234568e-05, "loss": 0.2011, "step": 2850 }, { "epoch": 12.0, "eval_accuracy": 0.7850467289719626, "eval_f1": 0.7377910935476946, "eval_loss": 1.2643272876739502, "eval_runtime": 23.6042, "eval_samples_per_second": 86.129, "eval_steps_per_second": 1.822, "step": 2880 }, { "epoch": 12.083333333333334, "grad_norm": 2.1910037994384766, "learning_rate": 6.643518518518519e-05, "loss": 0.282, "step": 2900 }, { "epoch": 12.291666666666666, "grad_norm": 3.8286426067352295, "learning_rate": 6.566358024691358e-05, "loss": 0.2043, "step": 2950 }, { "epoch": 12.5, "grad_norm": 4.921551704406738, "learning_rate": 6.489197530864198e-05, "loss": 0.2155, "step": 3000 }, { "epoch": 12.708333333333334, "grad_norm": 1.8685420751571655, "learning_rate": 6.412037037037038e-05, "loss": 0.2111, "step": 3050 }, { "epoch": 12.916666666666666, "grad_norm": 6.422890663146973, "learning_rate": 6.334876543209877e-05, "loss": 0.1777, "step": 3100 }, { "epoch": 13.0, "eval_accuracy": 0.7830791933103788, "eval_f1": 0.7234597353562061, "eval_loss": 1.374625325202942, "eval_runtime": 23.6021, "eval_samples_per_second": 86.137, "eval_steps_per_second": 1.822, "step": 3120 }, { "epoch": 13.125, "grad_norm": 8.687601089477539, "learning_rate": 6.257716049382715e-05, "loss": 0.1555, "step": 3150 }, { "epoch": 13.333333333333334, "grad_norm": 11.324960708618164, "learning_rate": 6.180555555555556e-05, "loss": 0.1499, "step": 3200 }, { "epoch": 13.541666666666666, "grad_norm": 3.336853504180908, "learning_rate": 6.103395061728395e-05, "loss": 0.1864, "step": 3250 }, { "epoch": 13.75, "grad_norm": 1.6025912761688232, "learning_rate": 6.0262345679012345e-05, "loss": 0.1843, "step": 3300 }, { "epoch": 13.958333333333334, "grad_norm": 4.139062881469727, "learning_rate": 5.949074074074075e-05, "loss": 0.2021, "step": 3350 }, { "epoch": 14.0, "eval_accuracy": 0.7884899163797344, "eval_f1": 0.7453922882000399, "eval_loss": 1.4031716585159302, "eval_runtime": 23.982, "eval_samples_per_second": 84.772, "eval_steps_per_second": 1.793, "step": 3360 }, { "epoch": 14.166666666666666, "grad_norm": 1.704457402229309, "learning_rate": 5.8719135802469135e-05, "loss": 0.2016, "step": 3400 }, { "epoch": 14.375, "grad_norm": 5.048659801483154, "learning_rate": 5.794753086419753e-05, "loss": 0.1902, "step": 3450 }, { "epoch": 14.583333333333334, "grad_norm": 1.1883018016815186, "learning_rate": 5.717592592592593e-05, "loss": 0.1661, "step": 3500 }, { "epoch": 14.791666666666666, "grad_norm": 1.4773151874542236, "learning_rate": 5.640432098765433e-05, "loss": 0.1704, "step": 3550 }, { "epoch": 15.0, "grad_norm": 3.672395944595337, "learning_rate": 5.5632716049382716e-05, "loss": 0.1301, "step": 3600 }, { "epoch": 15.0, "eval_accuracy": 0.7968519429414658, "eval_f1": 0.7390307058335169, "eval_loss": 1.3756645917892456, "eval_runtime": 23.7146, "eval_samples_per_second": 85.728, "eval_steps_per_second": 1.813, "step": 3600 }, { "epoch": 15.208333333333334, "grad_norm": 8.429780960083008, "learning_rate": 5.486111111111112e-05, "loss": 0.1045, "step": 3650 }, { "epoch": 15.416666666666666, "grad_norm": 3.3841114044189453, "learning_rate": 5.408950617283951e-05, "loss": 0.1204, "step": 3700 }, { "epoch": 15.625, "grad_norm": 0.8274503350257874, "learning_rate": 5.33179012345679e-05, "loss": 0.1491, "step": 3750 }, { "epoch": 15.833333333333334, "grad_norm": 2.2125766277313232, "learning_rate": 5.25462962962963e-05, "loss": 0.1288, "step": 3800 }, { "epoch": 16.0, "eval_accuracy": 0.7968519429414658, "eval_f1": 0.747298612060153, "eval_loss": 1.3802248239517212, "eval_runtime": 23.7435, "eval_samples_per_second": 85.623, "eval_steps_per_second": 1.811, "step": 3840 }, { "epoch": 16.041666666666668, "grad_norm": 1.3442326784133911, "learning_rate": 5.17746913580247e-05, "loss": 0.1558, "step": 3850 }, { "epoch": 16.25, "grad_norm": 1.203651785850525, "learning_rate": 5.100308641975309e-05, "loss": 0.1082, "step": 3900 }, { "epoch": 16.458333333333332, "grad_norm": 1.2546812295913696, "learning_rate": 5.023148148148148e-05, "loss": 0.0965, "step": 3950 }, { "epoch": 16.666666666666668, "grad_norm": 0.7136884331703186, "learning_rate": 4.945987654320988e-05, "loss": 0.1154, "step": 4000 }, { "epoch": 16.875, "grad_norm": 0.4750060439109802, "learning_rate": 4.868827160493827e-05, "loss": 0.1112, "step": 4050 }, { "epoch": 17.0, "eval_accuracy": 0.8007870142646335, "eval_f1": 0.7553450768467522, "eval_loss": 1.428536057472229, "eval_runtime": 23.7879, "eval_samples_per_second": 85.464, "eval_steps_per_second": 1.808, "step": 4080 }, { "epoch": 17.083333333333332, "grad_norm": 1.5086203813552856, "learning_rate": 4.791666666666667e-05, "loss": 0.1215, "step": 4100 }, { "epoch": 17.291666666666668, "grad_norm": 2.4366488456726074, "learning_rate": 4.714506172839506e-05, "loss": 0.0719, "step": 4150 }, { "epoch": 17.5, "grad_norm": 3.2962234020233154, "learning_rate": 4.637345679012346e-05, "loss": 0.1204, "step": 4200 }, { "epoch": 17.708333333333332, "grad_norm": 3.0297040939331055, "learning_rate": 4.5601851851851854e-05, "loss": 0.0865, "step": 4250 }, { "epoch": 17.916666666666668, "grad_norm": 0.18285313248634338, "learning_rate": 4.483024691358025e-05, "loss": 0.0948, "step": 4300 }, { "epoch": 18.0, "eval_accuracy": 0.7988194786030497, "eval_f1": 0.7510609054238961, "eval_loss": 1.5198878049850464, "eval_runtime": 23.685, "eval_samples_per_second": 85.835, "eval_steps_per_second": 1.815, "step": 4320 }, { "epoch": 18.125, "grad_norm": 0.5847868919372559, "learning_rate": 4.4058641975308644e-05, "loss": 0.0986, "step": 4350 }, { "epoch": 18.333333333333332, "grad_norm": 2.1199817657470703, "learning_rate": 4.328703703703704e-05, "loss": 0.0872, "step": 4400 }, { "epoch": 18.541666666666668, "grad_norm": 0.6489153504371643, "learning_rate": 4.2515432098765435e-05, "loss": 0.0808, "step": 4450 }, { "epoch": 18.75, "grad_norm": 2.3209807872772217, "learning_rate": 4.174382716049383e-05, "loss": 0.1215, "step": 4500 }, { "epoch": 18.958333333333332, "grad_norm": 2.4139556884765625, "learning_rate": 4.0972222222222225e-05, "loss": 0.0957, "step": 4550 }, { "epoch": 19.0, "eval_accuracy": 0.7958681751106739, "eval_f1": 0.7576130155967707, "eval_loss": 1.473281741142273, "eval_runtime": 23.8084, "eval_samples_per_second": 85.39, "eval_steps_per_second": 1.806, "step": 4560 }, { "epoch": 19.166666666666668, "grad_norm": 3.5237743854522705, "learning_rate": 4.020061728395062e-05, "loss": 0.0744, "step": 4600 }, { "epoch": 19.375, "grad_norm": 1.4964354038238525, "learning_rate": 3.942901234567901e-05, "loss": 0.0934, "step": 4650 }, { "epoch": 19.583333333333332, "grad_norm": 4.37375545501709, "learning_rate": 3.865740740740741e-05, "loss": 0.0772, "step": 4700 }, { "epoch": 19.791666666666668, "grad_norm": 10.330802917480469, "learning_rate": 3.7885802469135806e-05, "loss": 0.0734, "step": 4750 }, { "epoch": 20.0, "grad_norm": 0.23749621212482452, "learning_rate": 3.7114197530864194e-05, "loss": 0.098, "step": 4800 }, { "epoch": 20.0, "eval_accuracy": 0.7998032464338416, "eval_f1": 0.7624627184134384, "eval_loss": 1.556807518005371, "eval_runtime": 23.2783, "eval_samples_per_second": 87.334, "eval_steps_per_second": 1.847, "step": 4800 }, { "epoch": 20.208333333333332, "grad_norm": 0.3187510669231415, "learning_rate": 3.6342592592592596e-05, "loss": 0.0633, "step": 4850 }, { "epoch": 20.416666666666668, "grad_norm": 0.13432516157627106, "learning_rate": 3.5586419753086416e-05, "loss": 0.0614, "step": 4900 }, { "epoch": 20.625, "grad_norm": 0.25370869040489197, "learning_rate": 3.481481481481482e-05, "loss": 0.0861, "step": 4950 }, { "epoch": 20.833333333333332, "grad_norm": 3.7762410640716553, "learning_rate": 3.4043209876543214e-05, "loss": 0.0749, "step": 5000 }, { "epoch": 21.0, "eval_accuracy": 0.8037383177570093, "eval_f1": 0.7641798211209964, "eval_loss": 1.5395395755767822, "eval_runtime": 23.181, "eval_samples_per_second": 87.701, "eval_steps_per_second": 1.855, "step": 5040 }, { "epoch": 21.041666666666668, "grad_norm": 0.18449953198432922, "learning_rate": 3.327160493827161e-05, "loss": 0.0555, "step": 5050 }, { "epoch": 21.25, "grad_norm": 0.1283693015575409, "learning_rate": 3.2500000000000004e-05, "loss": 0.0846, "step": 5100 }, { "epoch": 21.458333333333332, "grad_norm": 0.160166934132576, "learning_rate": 3.172839506172839e-05, "loss": 0.0812, "step": 5150 }, { "epoch": 21.666666666666668, "grad_norm": 3.508594274520874, "learning_rate": 3.0956790123456794e-05, "loss": 0.0591, "step": 5200 }, { "epoch": 21.875, "grad_norm": 2.580338954925537, "learning_rate": 3.018518518518519e-05, "loss": 0.0517, "step": 5250 }, { "epoch": 22.0, "eval_accuracy": 0.8027545499262174, "eval_f1": 0.7624346197209738, "eval_loss": 1.5220506191253662, "eval_runtime": 23.2074, "eval_samples_per_second": 87.601, "eval_steps_per_second": 1.853, "step": 5280 }, { "epoch": 22.083333333333332, "grad_norm": 0.5038365721702576, "learning_rate": 2.941358024691358e-05, "loss": 0.1003, "step": 5300 }, { "epoch": 22.291666666666668, "grad_norm": 3.1954283714294434, "learning_rate": 2.8641975308641977e-05, "loss": 0.0765, "step": 5350 }, { "epoch": 22.5, "grad_norm": 4.271166801452637, "learning_rate": 2.7870370370370375e-05, "loss": 0.0795, "step": 5400 }, { "epoch": 22.708333333333332, "grad_norm": 0.5485684871673584, "learning_rate": 2.7098765432098767e-05, "loss": 0.0499, "step": 5450 }, { "epoch": 22.916666666666668, "grad_norm": 10.670831680297852, "learning_rate": 2.6327160493827162e-05, "loss": 0.0558, "step": 5500 }, { "epoch": 23.0, "eval_accuracy": 0.8037383177570093, "eval_f1": 0.7688860228768916, "eval_loss": 1.5517994165420532, "eval_runtime": 23.1917, "eval_samples_per_second": 87.661, "eval_steps_per_second": 1.854, "step": 5520 }, { "epoch": 23.125, "grad_norm": 0.2618740499019623, "learning_rate": 2.5555555555555554e-05, "loss": 0.0602, "step": 5550 }, { "epoch": 23.333333333333332, "grad_norm": 3.9775030612945557, "learning_rate": 2.4783950617283953e-05, "loss": 0.0584, "step": 5600 }, { "epoch": 23.541666666666668, "grad_norm": 8.924899101257324, "learning_rate": 2.4012345679012348e-05, "loss": 0.0373, "step": 5650 }, { "epoch": 23.75, "grad_norm": 0.7083622217178345, "learning_rate": 2.324074074074074e-05, "loss": 0.0655, "step": 5700 }, { "epoch": 23.958333333333332, "grad_norm": 0.6933650374412537, "learning_rate": 2.246913580246914e-05, "loss": 0.0783, "step": 5750 }, { "epoch": 24.0, "eval_accuracy": 0.8037383177570093, "eval_f1": 0.7676599116368084, "eval_loss": 1.5943090915679932, "eval_runtime": 23.205, "eval_samples_per_second": 87.611, "eval_steps_per_second": 1.853, "step": 5760 }, { "epoch": 24.166666666666668, "grad_norm": 0.4165418744087219, "learning_rate": 2.1697530864197534e-05, "loss": 0.0629, "step": 5800 }, { "epoch": 24.375, "grad_norm": 5.558866500854492, "learning_rate": 2.0925925925925925e-05, "loss": 0.0584, "step": 5850 }, { "epoch": 24.583333333333332, "grad_norm": 0.15994061529636383, "learning_rate": 2.015432098765432e-05, "loss": 0.0378, "step": 5900 }, { "epoch": 24.791666666666668, "grad_norm": 0.09766929596662521, "learning_rate": 1.938271604938272e-05, "loss": 0.0628, "step": 5950 }, { "epoch": 25.0, "grad_norm": 0.6397963166236877, "learning_rate": 1.861111111111111e-05, "loss": 0.0566, "step": 6000 }, { "epoch": 25.0, "eval_accuracy": 0.807673389080177, "eval_f1": 0.7516748081782088, "eval_loss": 1.5415334701538086, "eval_runtime": 23.1793, "eval_samples_per_second": 87.707, "eval_steps_per_second": 1.855, "step": 6000 }, { "epoch": 25.208333333333332, "grad_norm": 0.6714476346969604, "learning_rate": 1.7839506172839506e-05, "loss": 0.0588, "step": 6050 }, { "epoch": 25.416666666666668, "grad_norm": 2.5402417182922363, "learning_rate": 1.70679012345679e-05, "loss": 0.0441, "step": 6100 }, { "epoch": 25.625, "grad_norm": 0.8831026554107666, "learning_rate": 1.62962962962963e-05, "loss": 0.0464, "step": 6150 }, { "epoch": 25.833333333333332, "grad_norm": 5.741055965423584, "learning_rate": 1.5524691358024692e-05, "loss": 0.0975, "step": 6200 }, { "epoch": 26.0, "eval_accuracy": 0.8071815051647812, "eval_f1": 0.7753333468711867, "eval_loss": 1.5777385234832764, "eval_runtime": 23.2054, "eval_samples_per_second": 87.609, "eval_steps_per_second": 1.853, "step": 6240 }, { "epoch": 26.041666666666668, "grad_norm": 0.06368303298950195, "learning_rate": 1.4753086419753087e-05, "loss": 0.0377, "step": 6250 }, { "epoch": 26.25, "grad_norm": 0.14520739018917084, "learning_rate": 1.3981481481481482e-05, "loss": 0.056, "step": 6300 }, { "epoch": 26.458333333333332, "grad_norm": 5.526387691497803, "learning_rate": 1.3209876543209876e-05, "loss": 0.0506, "step": 6350 }, { "epoch": 26.666666666666668, "grad_norm": 2.3162128925323486, "learning_rate": 1.2438271604938271e-05, "loss": 0.0536, "step": 6400 }, { "epoch": 26.875, "grad_norm": 4.010312557220459, "learning_rate": 1.1666666666666668e-05, "loss": 0.0415, "step": 6450 }, { "epoch": 27.0, "eval_accuracy": 0.8106246925725529, "eval_f1": 0.7798820497054886, "eval_loss": 1.5293785333633423, "eval_runtime": 23.2829, "eval_samples_per_second": 87.317, "eval_steps_per_second": 1.847, "step": 6480 }, { "epoch": 27.083333333333332, "grad_norm": 0.5132197737693787, "learning_rate": 1.0895061728395061e-05, "loss": 0.045, "step": 6500 }, { "epoch": 27.291666666666668, "grad_norm": 0.4705391228199005, "learning_rate": 1.0123456790123458e-05, "loss": 0.0891, "step": 6550 }, { "epoch": 27.5, "grad_norm": 0.08739415556192398, "learning_rate": 9.351851851851852e-06, "loss": 0.0598, "step": 6600 }, { "epoch": 27.708333333333332, "grad_norm": 0.12814438343048096, "learning_rate": 8.580246913580247e-06, "loss": 0.0505, "step": 6650 }, { "epoch": 27.916666666666668, "grad_norm": 0.6424435973167419, "learning_rate": 7.808641975308642e-06, "loss": 0.0457, "step": 6700 }, { "epoch": 28.0, "eval_accuracy": 0.8111165764879489, "eval_f1": 0.7744206411564545, "eval_loss": 1.5268691778182983, "eval_runtime": 23.5095, "eval_samples_per_second": 86.476, "eval_steps_per_second": 1.829, "step": 6720 }, { "epoch": 28.125, "grad_norm": 0.05361216142773628, "learning_rate": 7.0370370370370375e-06, "loss": 0.0451, "step": 6750 }, { "epoch": 28.333333333333332, "grad_norm": 0.1594465970993042, "learning_rate": 6.265432098765432e-06, "loss": 0.0496, "step": 6800 }, { "epoch": 28.541666666666668, "grad_norm": 0.35148361325263977, "learning_rate": 5.493827160493827e-06, "loss": 0.0452, "step": 6850 }, { "epoch": 28.75, "grad_norm": 0.44529807567596436, "learning_rate": 4.722222222222222e-06, "loss": 0.0481, "step": 6900 }, { "epoch": 28.958333333333332, "grad_norm": 1.6227864027023315, "learning_rate": 3.9506172839506175e-06, "loss": 0.0536, "step": 6950 }, { "epoch": 29.0, "eval_accuracy": 0.8106246925725529, "eval_f1": 0.7768268032848169, "eval_loss": 1.5296322107315063, "eval_runtime": 23.3655, "eval_samples_per_second": 87.009, "eval_steps_per_second": 1.84, "step": 6960 }, { "epoch": 29.166666666666668, "grad_norm": 2.7951414585113525, "learning_rate": 3.1790123456790123e-06, "loss": 0.0347, "step": 7000 }, { "epoch": 29.375, "grad_norm": 2.8032307624816895, "learning_rate": 2.4074074074074075e-06, "loss": 0.0454, "step": 7050 }, { "epoch": 29.583333333333332, "grad_norm": 0.09597442299127579, "learning_rate": 1.6358024691358025e-06, "loss": 0.0499, "step": 7100 }, { "epoch": 29.791666666666668, "grad_norm": 8.167551040649414, "learning_rate": 8.641975308641975e-07, "loss": 0.0529, "step": 7150 }, { "epoch": 30.0, "grad_norm": 0.09000340104103088, "learning_rate": 9.25925925925926e-08, "loss": 0.0471, "step": 7200 }, { "epoch": 30.0, "eval_accuracy": 0.8101328086571569, "eval_f1": 0.7664919887901929, "eval_loss": 1.5375313758850098, "eval_runtime": 23.148, "eval_samples_per_second": 87.826, "eval_steps_per_second": 1.858, "step": 7200 } ], "logging_steps": 50, "max_steps": 7200, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.0469261495692793e+20, "train_batch_size": 48, "trial_name": null, "trial_params": null }