{ "best_global_step": 1000, "best_metric": 168.64974807007812, "best_model_checkpoint": "./whisper-small-finetuned-multilingual-on-kaggle-v2/checkpoint-1000", "epoch": 1.1273957158962795, "eval_steps": 1000, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02818489289740699, "grad_norm": 0.6385474801063538, "learning_rate": 4.800000000000001e-07, "loss": 0.436, "step": 25 }, { "epoch": 0.05636978579481398, "grad_norm": 0.364500492811203, "learning_rate": 9.800000000000001e-07, "loss": 0.4351, "step": 50 }, { "epoch": 0.08455467869222097, "grad_norm": 0.249020516872406, "learning_rate": 1.48e-06, "loss": 0.4622, "step": 75 }, { "epoch": 0.11273957158962795, "grad_norm": 0.6031928062438965, "learning_rate": 1.98e-06, "loss": 0.4681, "step": 100 }, { "epoch": 0.14092446448703494, "grad_norm": 0.3203608989715576, "learning_rate": 2.4800000000000004e-06, "loss": 0.4639, "step": 125 }, { "epoch": 0.16910935738444194, "grad_norm": 0.6545065641403198, "learning_rate": 2.9800000000000003e-06, "loss": 0.4419, "step": 150 }, { "epoch": 0.19729425028184894, "grad_norm": 0.2052975744009018, "learning_rate": 3.48e-06, "loss": 0.4438, "step": 175 }, { "epoch": 0.2254791431792559, "grad_norm": 0.6051822304725647, "learning_rate": 3.980000000000001e-06, "loss": 0.4034, "step": 200 }, { "epoch": 0.25366403607666294, "grad_norm": 0.6141038537025452, "learning_rate": 4.48e-06, "loss": 0.4104, "step": 225 }, { "epoch": 0.2818489289740699, "grad_norm": 0.534954845905304, "learning_rate": 4.980000000000001e-06, "loss": 0.4167, "step": 250 }, { "epoch": 0.3100338218714769, "grad_norm": 0.6292374730110168, "learning_rate": 5.480000000000001e-06, "loss": 0.387, "step": 275 }, { "epoch": 0.3382187147688839, "grad_norm": 0.5661519765853882, "learning_rate": 5.98e-06, "loss": 0.4088, "step": 300 }, { "epoch": 0.3664036076662909, "grad_norm": 0.3687807321548462, "learning_rate": 6.480000000000001e-06, "loss": 0.3419, "step": 325 }, { "epoch": 0.3945885005636979, "grad_norm": 0.5748394131660461, "learning_rate": 6.98e-06, "loss": 0.3881, "step": 350 }, { "epoch": 0.4227733934611049, "grad_norm": 0.40532639622688293, "learning_rate": 7.48e-06, "loss": 0.3589, "step": 375 }, { "epoch": 0.4509582863585118, "grad_norm": 0.2873406410217285, "learning_rate": 7.980000000000002e-06, "loss": 0.3263, "step": 400 }, { "epoch": 0.4791431792559188, "grad_norm": 0.3314521908760071, "learning_rate": 8.48e-06, "loss": 0.3614, "step": 425 }, { "epoch": 0.5073280721533259, "grad_norm": 0.1603882908821106, "learning_rate": 8.98e-06, "loss": 0.2867, "step": 450 }, { "epoch": 0.5355129650507328, "grad_norm": 0.2617914378643036, "learning_rate": 9.48e-06, "loss": 0.3226, "step": 475 }, { "epoch": 0.5636978579481398, "grad_norm": 0.16243544220924377, "learning_rate": 9.980000000000001e-06, "loss": 0.2966, "step": 500 }, { "epoch": 0.5918827508455468, "grad_norm": 0.21829640865325928, "learning_rate": 9.946666666666667e-06, "loss": 0.2815, "step": 525 }, { "epoch": 0.6200676437429538, "grad_norm": 0.30274373292922974, "learning_rate": 9.891111111111113e-06, "loss": 0.2956, "step": 550 }, { "epoch": 0.6482525366403608, "grad_norm": 0.5401255488395691, "learning_rate": 9.835555555555556e-06, "loss": 0.2772, "step": 575 }, { "epoch": 0.6764374295377678, "grad_norm": 0.596587061882019, "learning_rate": 9.780000000000001e-06, "loss": 0.2943, "step": 600 }, { "epoch": 0.7046223224351748, "grad_norm": 0.2570999562740326, "learning_rate": 9.724444444444445e-06, "loss": 0.2749, "step": 625 }, { "epoch": 0.7328072153325818, "grad_norm": 0.18179918825626373, "learning_rate": 9.66888888888889e-06, "loss": 0.2767, "step": 650 }, { "epoch": 0.7609921082299888, "grad_norm": 0.13621068000793457, "learning_rate": 9.613333333333335e-06, "loss": 0.2697, "step": 675 }, { "epoch": 0.7891770011273957, "grad_norm": 0.2660331130027771, "learning_rate": 9.557777777777777e-06, "loss": 0.2742, "step": 700 }, { "epoch": 0.8173618940248027, "grad_norm": 0.11947935819625854, "learning_rate": 9.502222222222223e-06, "loss": 0.2753, "step": 725 }, { "epoch": 0.8455467869222097, "grad_norm": 0.2724321484565735, "learning_rate": 9.446666666666667e-06, "loss": 0.2494, "step": 750 }, { "epoch": 0.8737316798196166, "grad_norm": 0.11410418152809143, "learning_rate": 9.391111111111111e-06, "loss": 0.264, "step": 775 }, { "epoch": 0.9019165727170236, "grad_norm": 0.13550838828086853, "learning_rate": 9.335555555555557e-06, "loss": 0.2611, "step": 800 }, { "epoch": 0.9301014656144306, "grad_norm": 1.507051706314087, "learning_rate": 9.280000000000001e-06, "loss": 0.2748, "step": 825 }, { "epoch": 0.9582863585118376, "grad_norm": 0.11221399158239365, "learning_rate": 9.224444444444445e-06, "loss": 0.2972, "step": 850 }, { "epoch": 0.9864712514092446, "grad_norm": 0.705410361289978, "learning_rate": 9.168888888888889e-06, "loss": 0.2527, "step": 875 }, { "epoch": 1.0146561443066517, "grad_norm": 0.13268794119358063, "learning_rate": 9.113333333333335e-06, "loss": 0.2646, "step": 900 }, { "epoch": 1.0428410372040586, "grad_norm": 1.25346839427948, "learning_rate": 9.057777777777779e-06, "loss": 0.265, "step": 925 }, { "epoch": 1.0710259301014655, "grad_norm": 1.471956491470337, "learning_rate": 9.002222222222223e-06, "loss": 0.2856, "step": 950 }, { "epoch": 1.0992108229988726, "grad_norm": 0.13319148123264313, "learning_rate": 8.946666666666669e-06, "loss": 0.2552, "step": 975 }, { "epoch": 1.1273957158962795, "grad_norm": 0.7707216143608093, "learning_rate": 8.891111111111111e-06, "loss": 0.2825, "step": 1000 }, { "epoch": 1.1273957158962795, "eval_loss": 1.5161501169204712, "eval_runtime": 3096.2186, "eval_samples_per_second": 0.541, "eval_steps_per_second": 0.017, "eval_wer": 168.64974807007812, "step": 1000 } ], "logging_steps": 25, "max_steps": 5000, "num_input_tokens_seen": 0, "num_train_epochs": 6, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 4.63717100371968e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }