| { | |
| "best_metric": 0.19081147015094757, | |
| "best_model_checkpoint": "Resneteau-50-2024_09_23-batch-size32_freeze/checkpoint-4914", | |
| "epoch": 28.0, | |
| "eval_steps": 500, | |
| "global_step": 7644, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.08766458766458766, | |
| "eval_f1_macro": 0.226738844317642, | |
| "eval_f1_micro": 0.5801698557249565, | |
| "eval_loss": 0.24598382413387299, | |
| "eval_runtime": 416.4206, | |
| "eval_samples_per_second": 6.93, | |
| "eval_steps_per_second": 0.219, | |
| "learning_rate": 0.001, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 1.8315018315018317, | |
| "grad_norm": 0.0508086197078228, | |
| "learning_rate": 0.001, | |
| "loss": 0.2786, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.13686763686763687, | |
| "eval_f1_macro": 0.3160165508599939, | |
| "eval_f1_micro": 0.6411905904944791, | |
| "eval_loss": 0.22168199717998505, | |
| "eval_runtime": 395.166, | |
| "eval_samples_per_second": 7.303, | |
| "eval_steps_per_second": 0.23, | |
| "learning_rate": 0.001, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.14864864864864866, | |
| "eval_f1_macro": 0.3580673052862397, | |
| "eval_f1_micro": 0.6595584072466503, | |
| "eval_loss": 0.21166761219501495, | |
| "eval_runtime": 401.6658, | |
| "eval_samples_per_second": 7.185, | |
| "eval_steps_per_second": 0.227, | |
| "learning_rate": 0.001, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 3.663003663003663, | |
| "grad_norm": 0.04649221897125244, | |
| "learning_rate": 0.001, | |
| "loss": 0.231, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.16181566181566182, | |
| "eval_f1_macro": 0.3831121485565155, | |
| "eval_f1_micro": 0.6673936750272628, | |
| "eval_loss": 0.20492619276046753, | |
| "eval_runtime": 411.5182, | |
| "eval_samples_per_second": 7.013, | |
| "eval_steps_per_second": 0.221, | |
| "learning_rate": 0.001, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.1677061677061677, | |
| "eval_f1_macro": 0.3964602797407069, | |
| "eval_f1_micro": 0.6707461695365495, | |
| "eval_loss": 0.20162147283554077, | |
| "eval_runtime": 414.4686, | |
| "eval_samples_per_second": 6.963, | |
| "eval_steps_per_second": 0.22, | |
| "learning_rate": 0.001, | |
| "step": 1365 | |
| }, | |
| { | |
| "epoch": 5.4945054945054945, | |
| "grad_norm": 0.03698631748557091, | |
| "learning_rate": 0.001, | |
| "loss": 0.2206, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.1677061677061677, | |
| "eval_f1_macro": 0.40758628553731013, | |
| "eval_f1_micro": 0.6719734660033168, | |
| "eval_loss": 0.20019273459911346, | |
| "eval_runtime": 422.4235, | |
| "eval_samples_per_second": 6.832, | |
| "eval_steps_per_second": 0.215, | |
| "learning_rate": 0.001, | |
| "step": 1638 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.17463617463617465, | |
| "eval_f1_macro": 0.4142080471846538, | |
| "eval_f1_micro": 0.6751762240426747, | |
| "eval_loss": 0.19761690497398376, | |
| "eval_runtime": 419.1011, | |
| "eval_samples_per_second": 6.886, | |
| "eval_steps_per_second": 0.217, | |
| "learning_rate": 0.001, | |
| "step": 1911 | |
| }, | |
| { | |
| "epoch": 7.326007326007326, | |
| "grad_norm": 0.05039607360959053, | |
| "learning_rate": 0.001, | |
| "loss": 0.2157, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.17636867636867637, | |
| "eval_f1_macro": 0.42809095916498113, | |
| "eval_f1_micro": 0.6823529411764706, | |
| "eval_loss": 0.19706940650939941, | |
| "eval_runtime": 419.7988, | |
| "eval_samples_per_second": 6.875, | |
| "eval_steps_per_second": 0.217, | |
| "learning_rate": 0.001, | |
| "step": 2184 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.17636867636867637, | |
| "eval_f1_macro": 0.43000179684162393, | |
| "eval_f1_micro": 0.6844589857443328, | |
| "eval_loss": 0.19613835215568542, | |
| "eval_runtime": 418.2306, | |
| "eval_samples_per_second": 6.9, | |
| "eval_steps_per_second": 0.218, | |
| "learning_rate": 0.001, | |
| "step": 2457 | |
| }, | |
| { | |
| "epoch": 9.157509157509157, | |
| "grad_norm": 0.05213358625769615, | |
| "learning_rate": 0.001, | |
| "loss": 0.2127, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.18052668052668053, | |
| "eval_f1_macro": 0.4264062108185488, | |
| "eval_f1_micro": 0.676261056657901, | |
| "eval_loss": 0.19443827867507935, | |
| "eval_runtime": 412.2909, | |
| "eval_samples_per_second": 7.0, | |
| "eval_steps_per_second": 0.221, | |
| "learning_rate": 0.001, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 10.989010989010989, | |
| "grad_norm": 0.035016052424907684, | |
| "learning_rate": 0.001, | |
| "loss": 0.2117, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_accuracy": 0.1781011781011781, | |
| "eval_f1_macro": 0.43914447135579204, | |
| "eval_f1_micro": 0.6902341199514971, | |
| "eval_loss": 0.19399969279766083, | |
| "eval_runtime": 415.1772, | |
| "eval_samples_per_second": 6.951, | |
| "eval_steps_per_second": 0.219, | |
| "learning_rate": 0.001, | |
| "step": 3003 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.1729036729036729, | |
| "eval_f1_macro": 0.45234247782022446, | |
| "eval_f1_micro": 0.6938511326860841, | |
| "eval_loss": 0.19451384246349335, | |
| "eval_runtime": 421.6946, | |
| "eval_samples_per_second": 6.844, | |
| "eval_steps_per_second": 0.216, | |
| "learning_rate": 0.001, | |
| "step": 3276 | |
| }, | |
| { | |
| "epoch": 12.820512820512821, | |
| "grad_norm": 0.051621340215206146, | |
| "learning_rate": 0.001, | |
| "loss": 0.2107, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_accuracy": 0.1794871794871795, | |
| "eval_f1_macro": 0.44605482120784584, | |
| "eval_f1_micro": 0.6907971453892439, | |
| "eval_loss": 0.19363747537136078, | |
| "eval_runtime": 400.266, | |
| "eval_samples_per_second": 7.21, | |
| "eval_steps_per_second": 0.227, | |
| "learning_rate": 0.001, | |
| "step": 3549 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_accuracy": 0.1781011781011781, | |
| "eval_f1_macro": 0.44244925103284655, | |
| "eval_f1_micro": 0.6916442548455903, | |
| "eval_loss": 0.1931454837322235, | |
| "eval_runtime": 399.4345, | |
| "eval_samples_per_second": 7.225, | |
| "eval_steps_per_second": 0.228, | |
| "learning_rate": 0.001, | |
| "step": 3822 | |
| }, | |
| { | |
| "epoch": 14.652014652014651, | |
| "grad_norm": 0.044662874191999435, | |
| "learning_rate": 0.001, | |
| "loss": 0.2105, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_accuracy": 0.18087318087318088, | |
| "eval_f1_macro": 0.44307178033824657, | |
| "eval_f1_micro": 0.6936180088187515, | |
| "eval_loss": 0.1935158371925354, | |
| "eval_runtime": 402.2391, | |
| "eval_samples_per_second": 7.175, | |
| "eval_steps_per_second": 0.226, | |
| "learning_rate": 0.001, | |
| "step": 4095 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.18052668052668053, | |
| "eval_f1_macro": 0.4428841041517678, | |
| "eval_f1_micro": 0.6895936942854461, | |
| "eval_loss": 0.19309590756893158, | |
| "eval_runtime": 394.2567, | |
| "eval_samples_per_second": 7.32, | |
| "eval_steps_per_second": 0.231, | |
| "learning_rate": 0.001, | |
| "step": 4368 | |
| }, | |
| { | |
| "epoch": 16.483516483516482, | |
| "grad_norm": 0.041027914732694626, | |
| "learning_rate": 0.001, | |
| "loss": 0.2086, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_accuracy": 0.18191268191268192, | |
| "eval_f1_macro": 0.4411042424961882, | |
| "eval_f1_micro": 0.6953186376449928, | |
| "eval_loss": 0.19311168789863586, | |
| "eval_runtime": 406.8435, | |
| "eval_samples_per_second": 7.094, | |
| "eval_steps_per_second": 0.224, | |
| "learning_rate": 0.001, | |
| "step": 4641 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_accuracy": 0.18572418572418573, | |
| "eval_f1_macro": 0.4490480976278912, | |
| "eval_f1_micro": 0.6983818770226538, | |
| "eval_loss": 0.19081147015094757, | |
| "eval_runtime": 398.4396, | |
| "eval_samples_per_second": 7.243, | |
| "eval_steps_per_second": 0.228, | |
| "learning_rate": 0.001, | |
| "step": 4914 | |
| }, | |
| { | |
| "epoch": 18.315018315018314, | |
| "grad_norm": 0.05783214792609215, | |
| "learning_rate": 0.001, | |
| "loss": 0.2101, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_accuracy": 0.1812196812196812, | |
| "eval_f1_macro": 0.4428453523216445, | |
| "eval_f1_micro": 0.6878854936673101, | |
| "eval_loss": 0.19249168038368225, | |
| "eval_runtime": 397.6758, | |
| "eval_samples_per_second": 7.257, | |
| "eval_steps_per_second": 0.229, | |
| "learning_rate": 0.001, | |
| "step": 5187 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.1774081774081774, | |
| "eval_f1_macro": 0.43568338344914237, | |
| "eval_f1_micro": 0.6796580216840999, | |
| "eval_loss": 0.19134406745433807, | |
| "eval_runtime": 404.3345, | |
| "eval_samples_per_second": 7.138, | |
| "eval_steps_per_second": 0.225, | |
| "learning_rate": 0.001, | |
| "step": 5460 | |
| }, | |
| { | |
| "epoch": 20.146520146520146, | |
| "grad_norm": 0.04509862884879112, | |
| "learning_rate": 0.001, | |
| "loss": 0.2088, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "eval_accuracy": 0.18225918225918225, | |
| "eval_f1_macro": 0.4381469652060519, | |
| "eval_f1_micro": 0.6957772621809745, | |
| "eval_loss": 0.19149190187454224, | |
| "eval_runtime": 403.3745, | |
| "eval_samples_per_second": 7.155, | |
| "eval_steps_per_second": 0.226, | |
| "learning_rate": 0.001, | |
| "step": 5733 | |
| }, | |
| { | |
| "epoch": 21.978021978021978, | |
| "grad_norm": 0.04410397261381149, | |
| "learning_rate": 0.001, | |
| "loss": 0.2084, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_accuracy": 0.1826056826056826, | |
| "eval_f1_macro": 0.4534807464842353, | |
| "eval_f1_micro": 0.7038712011577424, | |
| "eval_loss": 0.19192616641521454, | |
| "eval_runtime": 394.3882, | |
| "eval_samples_per_second": 7.318, | |
| "eval_steps_per_second": 0.231, | |
| "learning_rate": 0.001, | |
| "step": 6006 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "eval_accuracy": 0.17983367983367984, | |
| "eval_f1_macro": 0.4363028843794499, | |
| "eval_f1_micro": 0.6907461850762985, | |
| "eval_loss": 0.19255639612674713, | |
| "eval_runtime": 404.3195, | |
| "eval_samples_per_second": 7.138, | |
| "eval_steps_per_second": 0.225, | |
| "learning_rate": 0.001, | |
| "step": 6279 | |
| }, | |
| { | |
| "epoch": 23.80952380952381, | |
| "grad_norm": 0.0686459094285965, | |
| "learning_rate": 0.001, | |
| "loss": 0.2083, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_accuracy": 0.18052668052668053, | |
| "eval_f1_macro": 0.45443118252910614, | |
| "eval_f1_micro": 0.6952745610758312, | |
| "eval_loss": 0.19186602532863617, | |
| "eval_runtime": 397.0622, | |
| "eval_samples_per_second": 7.268, | |
| "eval_steps_per_second": 0.229, | |
| "learning_rate": 0.001, | |
| "step": 6552 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "eval_accuracy": 0.1781011781011781, | |
| "eval_f1_macro": 0.4465566917300777, | |
| "eval_f1_micro": 0.6961779911373708, | |
| "eval_loss": 0.19193170964717865, | |
| "eval_runtime": 397.2293, | |
| "eval_samples_per_second": 7.265, | |
| "eval_steps_per_second": 0.229, | |
| "learning_rate": 0.0001, | |
| "step": 6825 | |
| }, | |
| { | |
| "epoch": 25.641025641025642, | |
| "grad_norm": 0.04814450815320015, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2076, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "eval_accuracy": 0.18225918225918225, | |
| "eval_f1_macro": 0.441825214268795, | |
| "eval_f1_micro": 0.6942802624842929, | |
| "eval_loss": 0.19118554890155792, | |
| "eval_runtime": 398.0912, | |
| "eval_samples_per_second": 7.25, | |
| "eval_steps_per_second": 0.229, | |
| "learning_rate": 0.0001, | |
| "step": 7098 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "eval_accuracy": 0.18087318087318088, | |
| "eval_f1_macro": 0.449975636684123, | |
| "eval_f1_micro": 0.6971996137398262, | |
| "eval_loss": 0.19123922288417816, | |
| "eval_runtime": 398.8223, | |
| "eval_samples_per_second": 7.236, | |
| "eval_steps_per_second": 0.228, | |
| "learning_rate": 0.0001, | |
| "step": 7371 | |
| }, | |
| { | |
| "epoch": 27.47252747252747, | |
| "grad_norm": 0.05590255931019783, | |
| "learning_rate": 0.0001, | |
| "loss": 0.2081, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_accuracy": 0.18572418572418573, | |
| "eval_f1_macro": 0.44543509037683293, | |
| "eval_f1_micro": 0.6943913469159402, | |
| "eval_loss": 0.19151046872138977, | |
| "eval_runtime": 405.2259, | |
| "eval_samples_per_second": 7.122, | |
| "eval_steps_per_second": 0.225, | |
| "learning_rate": 0.0001, | |
| "step": 7644 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "learning_rate": 0.0001, | |
| "step": 7644, | |
| "total_flos": 2.778404267780425e+19, | |
| "train_loss": 0.2165746406882549, | |
| "train_runtime": 45987.1682, | |
| "train_samples_per_second": 75.812, | |
| "train_steps_per_second": 2.375 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 109200, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 400, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "EarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 10, | |
| "early_stopping_threshold": 0.0 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 0 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.778404267780425e+19, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |