xls-r-128-italic-massive / trainer_state.json
alkiskoudounas's picture
Upload folder using huggingface_hub
833b463 verified
raw
history blame
16.6 kB
{
"best_metric": 0.7855386128873586,
"best_model_checkpoint": "wav2vec2-xls-r-300m-ic-finetuning/checkpoint-2790",
"epoch": 35.0,
"global_step": 3150,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.56,
"learning_rate": 1.5555555555555555e-05,
"loss": 4.0884,
"step": 50
},
{
"epoch": 1.0,
"eval_accuracy": 0.06443679291687161,
"eval_loss": 3.94179630279541,
"eval_runtime": 17.2679,
"eval_samples_per_second": 117.733,
"eval_steps_per_second": 0.927,
"step": 90
},
{
"epoch": 1.11,
"learning_rate": 3.142857142857143e-05,
"loss": 4.0116,
"step": 100
},
{
"epoch": 1.67,
"learning_rate": 4.73015873015873e-05,
"loss": 3.8285,
"step": 150
},
{
"epoch": 2.0,
"eval_accuracy": 0.06443679291687161,
"eval_loss": 3.742227792739868,
"eval_runtime": 17.4046,
"eval_samples_per_second": 116.808,
"eval_steps_per_second": 0.919,
"step": 180
},
{
"epoch": 2.22,
"learning_rate": 6.317460317460318e-05,
"loss": 3.7787,
"step": 200
},
{
"epoch": 2.78,
"learning_rate": 7.904761904761905e-05,
"loss": 3.7603,
"step": 250
},
{
"epoch": 3.0,
"eval_accuracy": 0.06443679291687161,
"eval_loss": 3.730922222137451,
"eval_runtime": 17.2612,
"eval_samples_per_second": 117.779,
"eval_steps_per_second": 0.927,
"step": 270
},
{
"epoch": 3.33,
"learning_rate": 9.46031746031746e-05,
"loss": 3.7493,
"step": 300
},
{
"epoch": 3.89,
"learning_rate": 9.883597883597884e-05,
"loss": 3.7475,
"step": 350
},
{
"epoch": 4.0,
"eval_accuracy": 0.08460403344810624,
"eval_loss": 3.719400405883789,
"eval_runtime": 17.1221,
"eval_samples_per_second": 118.736,
"eval_steps_per_second": 0.934,
"step": 360
},
{
"epoch": 4.44,
"learning_rate": 9.707231040564374e-05,
"loss": 3.7259,
"step": 400
},
{
"epoch": 5.0,
"learning_rate": 9.530864197530865e-05,
"loss": 3.6843,
"step": 450
},
{
"epoch": 5.0,
"eval_accuracy": 0.102803738317757,
"eval_loss": 3.6270625591278076,
"eval_runtime": 16.8572,
"eval_samples_per_second": 120.601,
"eval_steps_per_second": 0.949,
"step": 450
},
{
"epoch": 5.56,
"learning_rate": 9.354497354497354e-05,
"loss": 3.6367,
"step": 500
},
{
"epoch": 6.0,
"eval_accuracy": 0.12690605017215936,
"eval_loss": 3.4777231216430664,
"eval_runtime": 16.8395,
"eval_samples_per_second": 120.728,
"eval_steps_per_second": 0.95,
"step": 540
},
{
"epoch": 6.11,
"learning_rate": 9.178130511463845e-05,
"loss": 3.5853,
"step": 550
},
{
"epoch": 6.67,
"learning_rate": 9.001763668430336e-05,
"loss": 3.4016,
"step": 600
},
{
"epoch": 7.0,
"eval_accuracy": 0.21446138711264143,
"eval_loss": 3.1104345321655273,
"eval_runtime": 16.8901,
"eval_samples_per_second": 120.366,
"eval_steps_per_second": 0.947,
"step": 630
},
{
"epoch": 7.22,
"learning_rate": 8.825396825396825e-05,
"loss": 3.213,
"step": 650
},
{
"epoch": 7.78,
"learning_rate": 8.649029982363316e-05,
"loss": 2.946,
"step": 700
},
{
"epoch": 8.0,
"eval_accuracy": 0.31234628627643873,
"eval_loss": 2.6535961627960205,
"eval_runtime": 16.8793,
"eval_samples_per_second": 120.444,
"eval_steps_per_second": 0.948,
"step": 720
},
{
"epoch": 8.33,
"learning_rate": 8.472663139329807e-05,
"loss": 2.7358,
"step": 750
},
{
"epoch": 8.89,
"learning_rate": 8.296296296296296e-05,
"loss": 2.514,
"step": 800
},
{
"epoch": 9.0,
"eval_accuracy": 0.4176094441711756,
"eval_loss": 2.3149795532226562,
"eval_runtime": 16.8771,
"eval_samples_per_second": 120.459,
"eval_steps_per_second": 0.948,
"step": 810
},
{
"epoch": 9.44,
"learning_rate": 8.119929453262787e-05,
"loss": 2.3267,
"step": 850
},
{
"epoch": 10.0,
"learning_rate": 7.947089947089948e-05,
"loss": 2.163,
"step": 900
},
{
"epoch": 10.0,
"eval_accuracy": 0.48106246925725527,
"eval_loss": 2.0448246002197266,
"eval_runtime": 16.8844,
"eval_samples_per_second": 120.407,
"eval_steps_per_second": 0.948,
"step": 900
},
{
"epoch": 10.56,
"learning_rate": 7.770723104056437e-05,
"loss": 1.947,
"step": 950
},
{
"epoch": 11.0,
"eval_accuracy": 0.5700934579439252,
"eval_loss": 1.7913175821304321,
"eval_runtime": 16.8279,
"eval_samples_per_second": 120.811,
"eval_steps_per_second": 0.951,
"step": 990
},
{
"epoch": 11.11,
"learning_rate": 7.594356261022928e-05,
"loss": 1.8568,
"step": 1000
},
{
"epoch": 11.67,
"learning_rate": 7.417989417989419e-05,
"loss": 1.7119,
"step": 1050
},
{
"epoch": 12.0,
"eval_accuracy": 0.6242006886374816,
"eval_loss": 1.616714596748352,
"eval_runtime": 16.8545,
"eval_samples_per_second": 120.62,
"eval_steps_per_second": 0.949,
"step": 1080
},
{
"epoch": 12.22,
"learning_rate": 7.241622574955908e-05,
"loss": 1.5846,
"step": 1100
},
{
"epoch": 12.78,
"learning_rate": 7.065255731922399e-05,
"loss": 1.4849,
"step": 1150
},
{
"epoch": 13.0,
"eval_accuracy": 0.6620757501229709,
"eval_loss": 1.5006158351898193,
"eval_runtime": 16.8739,
"eval_samples_per_second": 120.482,
"eval_steps_per_second": 0.948,
"step": 1170
},
{
"epoch": 13.33,
"learning_rate": 6.88888888888889e-05,
"loss": 1.3323,
"step": 1200
},
{
"epoch": 13.89,
"learning_rate": 6.712522045855379e-05,
"loss": 1.3281,
"step": 1250
},
{
"epoch": 14.0,
"eval_accuracy": 0.6699458927693065,
"eval_loss": 1.4369958639144897,
"eval_runtime": 16.7964,
"eval_samples_per_second": 121.038,
"eval_steps_per_second": 0.953,
"step": 1260
},
{
"epoch": 14.44,
"learning_rate": 6.53615520282187e-05,
"loss": 1.1692,
"step": 1300
},
{
"epoch": 15.0,
"learning_rate": 6.35978835978836e-05,
"loss": 1.1518,
"step": 1350
},
{
"epoch": 15.0,
"eval_accuracy": 0.6984751598622725,
"eval_loss": 1.3894743919372559,
"eval_runtime": 16.9538,
"eval_samples_per_second": 119.914,
"eval_steps_per_second": 0.944,
"step": 1350
},
{
"epoch": 15.56,
"learning_rate": 6.183421516754851e-05,
"loss": 0.9798,
"step": 1400
},
{
"epoch": 16.0,
"eval_accuracy": 0.7329070339399901,
"eval_loss": 1.3057465553283691,
"eval_runtime": 16.8273,
"eval_samples_per_second": 120.816,
"eval_steps_per_second": 0.951,
"step": 1440
},
{
"epoch": 16.11,
"learning_rate": 6.0070546737213404e-05,
"loss": 1.0168,
"step": 1450
},
{
"epoch": 16.67,
"learning_rate": 5.830687830687831e-05,
"loss": 0.9296,
"step": 1500
},
{
"epoch": 17.0,
"eval_accuracy": 0.7353664535169699,
"eval_loss": 1.2697882652282715,
"eval_runtime": 16.8455,
"eval_samples_per_second": 120.685,
"eval_steps_per_second": 0.95,
"step": 1530
},
{
"epoch": 17.22,
"learning_rate": 5.654320987654321e-05,
"loss": 0.8497,
"step": 1550
},
{
"epoch": 17.78,
"learning_rate": 5.477954144620812e-05,
"loss": 0.7886,
"step": 1600
},
{
"epoch": 18.0,
"eval_accuracy": 0.750122970978849,
"eval_loss": 1.2826693058013916,
"eval_runtime": 16.966,
"eval_samples_per_second": 119.828,
"eval_steps_per_second": 0.943,
"step": 1620
},
{
"epoch": 18.33,
"learning_rate": 5.301587301587302e-05,
"loss": 0.7637,
"step": 1650
},
{
"epoch": 18.89,
"learning_rate": 5.125220458553792e-05,
"loss": 0.7191,
"step": 1700
},
{
"epoch": 19.0,
"eval_accuracy": 0.749139203148057,
"eval_loss": 1.3038619756698608,
"eval_runtime": 16.7983,
"eval_samples_per_second": 121.024,
"eval_steps_per_second": 0.952,
"step": 1710
},
{
"epoch": 19.44,
"learning_rate": 4.948853615520283e-05,
"loss": 0.6952,
"step": 1750
},
{
"epoch": 20.0,
"learning_rate": 4.772486772486773e-05,
"loss": 0.609,
"step": 1800
},
{
"epoch": 20.0,
"eval_accuracy": 0.7594687653713723,
"eval_loss": 1.2997246980667114,
"eval_runtime": 16.7476,
"eval_samples_per_second": 121.39,
"eval_steps_per_second": 0.955,
"step": 1800
},
{
"epoch": 20.56,
"learning_rate": 4.596119929453263e-05,
"loss": 0.5703,
"step": 1850
},
{
"epoch": 21.0,
"eval_accuracy": 0.76586325627152,
"eval_loss": 1.2955206632614136,
"eval_runtime": 16.7956,
"eval_samples_per_second": 121.044,
"eval_steps_per_second": 0.953,
"step": 1890
},
{
"epoch": 21.11,
"learning_rate": 4.4197530864197535e-05,
"loss": 0.5608,
"step": 1900
},
{
"epoch": 21.67,
"learning_rate": 4.2433862433862435e-05,
"loss": 0.5251,
"step": 1950
},
{
"epoch": 22.0,
"eval_accuracy": 0.7614363010329562,
"eval_loss": 1.3140268325805664,
"eval_runtime": 16.7643,
"eval_samples_per_second": 121.27,
"eval_steps_per_second": 0.954,
"step": 1980
},
{
"epoch": 22.22,
"learning_rate": 4.067019400352734e-05,
"loss": 0.5351,
"step": 2000
},
{
"epoch": 22.78,
"learning_rate": 3.890652557319224e-05,
"loss": 0.5041,
"step": 2050
},
{
"epoch": 23.0,
"eval_accuracy": 0.7668470241023119,
"eval_loss": 1.319765567779541,
"eval_runtime": 16.8549,
"eval_samples_per_second": 120.618,
"eval_steps_per_second": 0.949,
"step": 2070
},
{
"epoch": 23.33,
"learning_rate": 3.717813051146385e-05,
"loss": 0.435,
"step": 2100
},
{
"epoch": 23.89,
"learning_rate": 3.5414462081128755e-05,
"loss": 0.4455,
"step": 2150
},
{
"epoch": 24.0,
"eval_accuracy": 0.7816035415641909,
"eval_loss": 1.3251405954360962,
"eval_runtime": 16.7377,
"eval_samples_per_second": 121.462,
"eval_steps_per_second": 0.956,
"step": 2160
},
{
"epoch": 24.44,
"learning_rate": 3.3650793650793656e-05,
"loss": 0.4152,
"step": 2200
},
{
"epoch": 25.0,
"learning_rate": 3.1887125220458556e-05,
"loss": 0.4046,
"step": 2250
},
{
"epoch": 25.0,
"eval_accuracy": 0.7825873093949828,
"eval_loss": 1.3233106136322021,
"eval_runtime": 16.7729,
"eval_samples_per_second": 121.207,
"eval_steps_per_second": 0.954,
"step": 2250
},
{
"epoch": 25.56,
"learning_rate": 3.012345679012346e-05,
"loss": 0.3861,
"step": 2300
},
{
"epoch": 26.0,
"eval_accuracy": 0.7737333989178554,
"eval_loss": 1.3713818788528442,
"eval_runtime": 16.8666,
"eval_samples_per_second": 120.534,
"eval_steps_per_second": 0.949,
"step": 2340
},
{
"epoch": 26.11,
"learning_rate": 2.8359788359788357e-05,
"loss": 0.3571,
"step": 2350
},
{
"epoch": 26.67,
"learning_rate": 2.659611992945326e-05,
"loss": 0.3798,
"step": 2400
},
{
"epoch": 27.0,
"eval_accuracy": 0.7747171667486473,
"eval_loss": 1.3577079772949219,
"eval_runtime": 16.7848,
"eval_samples_per_second": 121.122,
"eval_steps_per_second": 0.953,
"step": 2430
},
{
"epoch": 27.22,
"learning_rate": 2.4832451499118168e-05,
"loss": 0.3686,
"step": 2450
},
{
"epoch": 27.78,
"learning_rate": 2.3068783068783072e-05,
"loss": 0.3402,
"step": 2500
},
{
"epoch": 28.0,
"eval_accuracy": 0.7757009345794392,
"eval_loss": 1.3933676481246948,
"eval_runtime": 16.7209,
"eval_samples_per_second": 121.585,
"eval_steps_per_second": 0.957,
"step": 2520
},
{
"epoch": 28.33,
"learning_rate": 2.1305114638447972e-05,
"loss": 0.3556,
"step": 2550
},
{
"epoch": 28.89,
"learning_rate": 1.9541446208112876e-05,
"loss": 0.2951,
"step": 2600
},
{
"epoch": 29.0,
"eval_accuracy": 0.779636005902607,
"eval_loss": 1.3655041456222534,
"eval_runtime": 16.7839,
"eval_samples_per_second": 121.128,
"eval_steps_per_second": 0.953,
"step": 2610
},
{
"epoch": 29.44,
"learning_rate": 1.777777777777778e-05,
"loss": 0.3126,
"step": 2650
},
{
"epoch": 30.0,
"learning_rate": 1.6014109347442684e-05,
"loss": 0.2953,
"step": 2700
},
{
"epoch": 30.0,
"eval_accuracy": 0.7776684702410231,
"eval_loss": 1.3812841176986694,
"eval_runtime": 16.7955,
"eval_samples_per_second": 121.044,
"eval_steps_per_second": 0.953,
"step": 2700
},
{
"epoch": 30.56,
"learning_rate": 1.4250440917107586e-05,
"loss": 0.3125,
"step": 2750
},
{
"epoch": 31.0,
"eval_accuracy": 0.7855386128873586,
"eval_loss": 1.3787145614624023,
"eval_runtime": 16.8143,
"eval_samples_per_second": 120.909,
"eval_steps_per_second": 0.952,
"step": 2790
},
{
"epoch": 31.11,
"learning_rate": 1.2486772486772486e-05,
"loss": 0.2608,
"step": 2800
},
{
"epoch": 31.67,
"learning_rate": 1.072310405643739e-05,
"loss": 0.2564,
"step": 2850
},
{
"epoch": 32.0,
"eval_accuracy": 0.778652238071815,
"eval_loss": 1.3935987949371338,
"eval_runtime": 16.7896,
"eval_samples_per_second": 121.087,
"eval_steps_per_second": 0.953,
"step": 2880
},
{
"epoch": 32.22,
"learning_rate": 8.959435626102292e-06,
"loss": 0.2534,
"step": 2900
},
{
"epoch": 32.78,
"learning_rate": 7.195767195767196e-06,
"loss": 0.2427,
"step": 2950
},
{
"epoch": 33.0,
"eval_accuracy": 0.7855386128873586,
"eval_loss": 1.388238787651062,
"eval_runtime": 16.8064,
"eval_samples_per_second": 120.966,
"eval_steps_per_second": 0.952,
"step": 2970
},
{
"epoch": 33.33,
"learning_rate": 5.432098765432099e-06,
"loss": 0.2691,
"step": 3000
},
{
"epoch": 33.89,
"learning_rate": 3.668430335097002e-06,
"loss": 0.279,
"step": 3050
},
{
"epoch": 34.0,
"eval_accuracy": 0.7855386128873586,
"eval_loss": 1.4113564491271973,
"eval_runtime": 16.7799,
"eval_samples_per_second": 121.157,
"eval_steps_per_second": 0.954,
"step": 3060
},
{
"epoch": 34.44,
"learning_rate": 1.9047619047619051e-06,
"loss": 0.2582,
"step": 3100
},
{
"epoch": 35.0,
"learning_rate": 1.4109347442680778e-07,
"loss": 0.2308,
"step": 3150
},
{
"epoch": 35.0,
"eval_accuracy": 0.7855386128873586,
"eval_loss": 1.4130984544754028,
"eval_runtime": 16.8643,
"eval_samples_per_second": 120.551,
"eval_steps_per_second": 0.949,
"step": 3150
}
],
"max_steps": 3150,
"num_train_epochs": 35,
"total_flos": 1.221413841164159e+20,
"trial_name": null,
"trial_params": null
}