| { | |
| "best_metric": 0.042669691145420074, | |
| "best_model_checkpoint": "miner_id_24/checkpoint-1200", | |
| "epoch": 1.3592599584670568, | |
| "eval_steps": 200, | |
| "global_step": 1800, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0007551444213705871, | |
| "eval_loss": 0.3643783926963806, | |
| "eval_runtime": 87.2468, | |
| "eval_samples_per_second": 12.791, | |
| "eval_steps_per_second": 3.198, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.037757221068529356, | |
| "grad_norm": 1.4100414514541626, | |
| "learning_rate": 5e-05, | |
| "loss": 0.1364, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.07551444213705871, | |
| "grad_norm": 0.6465395092964172, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0635, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.11327166320558807, | |
| "grad_norm": 1.6140310764312744, | |
| "learning_rate": 9.98292246503335e-05, | |
| "loss": 0.047, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.15102888427411743, | |
| "grad_norm": 2.0268421173095703, | |
| "learning_rate": 9.931806517013612e-05, | |
| "loss": 0.0477, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.15102888427411743, | |
| "eval_loss": 0.05864099785685539, | |
| "eval_runtime": 88.7167, | |
| "eval_samples_per_second": 12.579, | |
| "eval_steps_per_second": 3.145, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.18878610534264678, | |
| "grad_norm": 1.6122969388961792, | |
| "learning_rate": 9.847001329696653e-05, | |
| "loss": 0.0468, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.22654332641117614, | |
| "grad_norm": 0.023569952696561813, | |
| "learning_rate": 9.729086208503174e-05, | |
| "loss": 0.0393, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.2643005474797055, | |
| "grad_norm": 0.6391419172286987, | |
| "learning_rate": 9.578866633275288e-05, | |
| "loss": 0.044, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.30205776854823485, | |
| "grad_norm": 0.2637801766395569, | |
| "learning_rate": 9.397368756032445e-05, | |
| "loss": 0.0349, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.30205776854823485, | |
| "eval_loss": 0.048699330538511276, | |
| "eval_runtime": 88.8453, | |
| "eval_samples_per_second": 12.561, | |
| "eval_steps_per_second": 3.14, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.3398149896167642, | |
| "grad_norm": 1.7430977821350098, | |
| "learning_rate": 9.185832391312644e-05, | |
| "loss": 0.0443, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.37757221068529356, | |
| "grad_norm": 0.3382541835308075, | |
| "learning_rate": 8.945702546981969e-05, | |
| "loss": 0.0392, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.4153294317538229, | |
| "grad_norm": 1.5577205419540405, | |
| "learning_rate": 8.678619553365659e-05, | |
| "loss": 0.0361, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.4530866528223523, | |
| "grad_norm": 0.838959276676178, | |
| "learning_rate": 8.386407858128706e-05, | |
| "loss": 0.0334, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.4530866528223523, | |
| "eval_loss": 0.05116293579339981, | |
| "eval_runtime": 88.8487, | |
| "eval_samples_per_second": 12.561, | |
| "eval_steps_per_second": 3.14, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.49084387389088163, | |
| "grad_norm": 1.6610183715820312, | |
| "learning_rate": 8.07106356344834e-05, | |
| "loss": 0.0422, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.528601094959411, | |
| "grad_norm": 1.1309680938720703, | |
| "learning_rate": 7.734740790612136e-05, | |
| "loss": 0.0348, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.5663583160279403, | |
| "grad_norm": 0.7380091547966003, | |
| "learning_rate": 7.379736965185368e-05, | |
| "loss": 0.0404, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.6041155370964697, | |
| "grad_norm": 0.2660733461380005, | |
| "learning_rate": 7.008477123264848e-05, | |
| "loss": 0.0444, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.6041155370964697, | |
| "eval_loss": 0.04871406406164169, | |
| "eval_runtime": 88.864, | |
| "eval_samples_per_second": 12.559, | |
| "eval_steps_per_second": 3.14, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.641872758164999, | |
| "grad_norm": 0.5725349187850952, | |
| "learning_rate": 6.623497346023418e-05, | |
| "loss": 0.0347, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.6796299792335284, | |
| "grad_norm": 0.9907957911491394, | |
| "learning_rate": 6.227427435703997e-05, | |
| "loss": 0.035, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.7173872003020577, | |
| "grad_norm": 0.7868975400924683, | |
| "learning_rate": 5.8229729514036705e-05, | |
| "loss": 0.0344, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.7551444213705871, | |
| "grad_norm": 2.8851935863494873, | |
| "learning_rate": 5.4128967273616625e-05, | |
| "loss": 0.0365, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.7551444213705871, | |
| "eval_loss": 0.0462164580821991, | |
| "eval_runtime": 88.9886, | |
| "eval_samples_per_second": 12.541, | |
| "eval_steps_per_second": 3.135, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.7929016424391164, | |
| "grad_norm": 0.26174625754356384, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0385, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.8306588635076458, | |
| "grad_norm": 0.055037304759025574, | |
| "learning_rate": 4.5871032726383386e-05, | |
| "loss": 0.039, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.8684160845761751, | |
| "grad_norm": 0.029556207358837128, | |
| "learning_rate": 4.17702704859633e-05, | |
| "loss": 0.0302, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.9061733056447046, | |
| "grad_norm": 0.44684645533561707, | |
| "learning_rate": 3.772572564296005e-05, | |
| "loss": 0.0365, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.9061733056447046, | |
| "eval_loss": 0.042669691145420074, | |
| "eval_runtime": 88.4572, | |
| "eval_samples_per_second": 12.616, | |
| "eval_steps_per_second": 3.154, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.9439305267132339, | |
| "grad_norm": 0.031862616539001465, | |
| "learning_rate": 3.3765026539765834e-05, | |
| "loss": 0.0312, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.9816877477817633, | |
| "grad_norm": 0.028305215761065483, | |
| "learning_rate": 2.991522876735154e-05, | |
| "loss": 0.0333, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.0194449688502927, | |
| "grad_norm": 0.16851945221424103, | |
| "learning_rate": 2.6202630348146324e-05, | |
| "loss": 0.0341, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 1.057202189918822, | |
| "grad_norm": 0.09734618663787842, | |
| "learning_rate": 2.2652592093878666e-05, | |
| "loss": 0.0196, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.057202189918822, | |
| "eval_loss": 0.04361049458384514, | |
| "eval_runtime": 88.6046, | |
| "eval_samples_per_second": 12.595, | |
| "eval_steps_per_second": 3.149, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.0949594109873513, | |
| "grad_norm": 0.022358063608407974, | |
| "learning_rate": 1.928936436551661e-05, | |
| "loss": 0.015, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 1.1327166320558808, | |
| "grad_norm": 0.23758022487163544, | |
| "learning_rate": 1.6135921418712956e-05, | |
| "loss": 0.0156, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.17047385312441, | |
| "grad_norm": 0.011663041077554226, | |
| "learning_rate": 1.3213804466343421e-05, | |
| "loss": 0.0158, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 1.2082310741929394, | |
| "grad_norm": 0.0888688713312149, | |
| "learning_rate": 1.0542974530180327e-05, | |
| "loss": 0.0192, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.2082310741929394, | |
| "eval_loss": 0.04343624785542488, | |
| "eval_runtime": 88.893, | |
| "eval_samples_per_second": 12.554, | |
| "eval_steps_per_second": 3.139, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.2459882952614687, | |
| "grad_norm": 0.28665322065353394, | |
| "learning_rate": 8.141676086873572e-06, | |
| "loss": 0.0139, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 1.283745516329998, | |
| "grad_norm": 0.026126619428396225, | |
| "learning_rate": 6.026312439675552e-06, | |
| "loss": 0.0178, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 1.3215027373985275, | |
| "grad_norm": 0.12513647973537445, | |
| "learning_rate": 4.2113336672471245e-06, | |
| "loss": 0.0157, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 1.3592599584670568, | |
| "grad_norm": 0.0799918845295906, | |
| "learning_rate": 2.7091379149682685e-06, | |
| "loss": 0.0138, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 1.3592599584670568, | |
| "eval_loss": 0.043415576219558716, | |
| "eval_runtime": 88.8821, | |
| "eval_samples_per_second": 12.556, | |
| "eval_steps_per_second": 3.139, | |
| "step": 1800 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 2000, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 200, | |
| "stateful_callbacks": { | |
| "EarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 3, | |
| "early_stopping_threshold": 0.0 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 3 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.2271277242822164e+18, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |