| { | |
| "best_metric": 0.8688188791275024, | |
| "best_model_checkpoint": "miner_id_24/checkpoint-50", | |
| "epoch": 0.007490356166435714, | |
| "eval_steps": 50, | |
| "global_step": 50, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.00014980712332871428, | |
| "grad_norm": 0.6298632621765137, | |
| "learning_rate": 1e-05, | |
| "loss": 0.8866, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.00014980712332871428, | |
| "eval_loss": 1.264488935470581, | |
| "eval_runtime": 565.4748, | |
| "eval_samples_per_second": 19.882, | |
| "eval_steps_per_second": 4.971, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.00029961424665742855, | |
| "grad_norm": 0.46446141600608826, | |
| "learning_rate": 2e-05, | |
| "loss": 0.7634, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.00044942136998614283, | |
| "grad_norm": 0.8355027437210083, | |
| "learning_rate": 3e-05, | |
| "loss": 0.9169, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.0005992284933148571, | |
| "grad_norm": 0.644906222820282, | |
| "learning_rate": 4e-05, | |
| "loss": 0.9282, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.0007490356166435714, | |
| "grad_norm": 0.49212518334388733, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9798, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.0008988427399722857, | |
| "grad_norm": 0.44296741485595703, | |
| "learning_rate": 6e-05, | |
| "loss": 1.0082, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.001048649863301, | |
| "grad_norm": 0.3797648847103119, | |
| "learning_rate": 7e-05, | |
| "loss": 0.9093, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.0011984569866297142, | |
| "grad_norm": 0.3763546049594879, | |
| "learning_rate": 8e-05, | |
| "loss": 1.0401, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.0013482641099584285, | |
| "grad_norm": 0.3808014988899231, | |
| "learning_rate": 9e-05, | |
| "loss": 1.0106, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.0014980712332871428, | |
| "grad_norm": 0.3745298385620117, | |
| "learning_rate": 0.0001, | |
| "loss": 0.9575, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.001647878356615857, | |
| "grad_norm": 0.39324429631233215, | |
| "learning_rate": 9.999316524962345e-05, | |
| "loss": 0.9435, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.0017976854799445713, | |
| "grad_norm": 0.3526301085948944, | |
| "learning_rate": 9.997266286704631e-05, | |
| "loss": 0.8208, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.0019474926032732856, | |
| "grad_norm": 0.37045592069625854, | |
| "learning_rate": 9.993849845741524e-05, | |
| "loss": 0.9634, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.002097299726602, | |
| "grad_norm": 0.33772504329681396, | |
| "learning_rate": 9.989068136093873e-05, | |
| "loss": 0.8449, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.0022471068499307144, | |
| "grad_norm": 0.43836352229118347, | |
| "learning_rate": 9.98292246503335e-05, | |
| "loss": 0.6716, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.0023969139732594284, | |
| "grad_norm": 0.3777485191822052, | |
| "learning_rate": 9.975414512725057e-05, | |
| "loss": 0.8852, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.002546721096588143, | |
| "grad_norm": 0.36619219183921814, | |
| "learning_rate": 9.966546331768191e-05, | |
| "loss": 0.8873, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.002696528219916857, | |
| "grad_norm": 0.3267233073711395, | |
| "learning_rate": 9.956320346634876e-05, | |
| "loss": 0.855, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.0028463353432455715, | |
| "grad_norm": 0.32704541087150574, | |
| "learning_rate": 9.944739353007344e-05, | |
| "loss": 0.8803, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.0029961424665742855, | |
| "grad_norm": 0.4222426414489746, | |
| "learning_rate": 9.931806517013612e-05, | |
| "loss": 0.8979, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.003145949589903, | |
| "grad_norm": 0.33164480328559875, | |
| "learning_rate": 9.917525374361912e-05, | |
| "loss": 0.7718, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.003295756713231714, | |
| "grad_norm": 0.32442235946655273, | |
| "learning_rate": 9.901899829374047e-05, | |
| "loss": 0.6811, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.0034455638365604286, | |
| "grad_norm": 0.3956811726093292, | |
| "learning_rate": 9.884934153917997e-05, | |
| "loss": 0.8274, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.0035953709598891426, | |
| "grad_norm": 0.4070234000682831, | |
| "learning_rate": 9.86663298624003e-05, | |
| "loss": 0.8034, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.003745178083217857, | |
| "grad_norm": 0.30631351470947266, | |
| "learning_rate": 9.847001329696653e-05, | |
| "loss": 0.7311, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.003894985206546571, | |
| "grad_norm": 0.34183362126350403, | |
| "learning_rate": 9.826044551386744e-05, | |
| "loss": 0.8869, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.004044792329875285, | |
| "grad_norm": 0.29005616903305054, | |
| "learning_rate": 9.803768380684242e-05, | |
| "loss": 0.8703, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.004194599453204, | |
| "grad_norm": 0.359518826007843, | |
| "learning_rate": 9.780178907671789e-05, | |
| "loss": 0.8542, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.004344406576532714, | |
| "grad_norm": 0.35285744071006775, | |
| "learning_rate": 9.755282581475769e-05, | |
| "loss": 0.7862, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.004494213699861429, | |
| "grad_norm": 0.32191500067710876, | |
| "learning_rate": 9.729086208503174e-05, | |
| "loss": 0.7777, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.004644020823190142, | |
| "grad_norm": 0.34467917680740356, | |
| "learning_rate": 9.701596950580806e-05, | |
| "loss": 0.8035, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.004793827946518857, | |
| "grad_norm": 0.31413033604621887, | |
| "learning_rate": 9.672822322997305e-05, | |
| "loss": 0.8019, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.004943635069847571, | |
| "grad_norm": 0.33082014322280884, | |
| "learning_rate": 9.642770192448536e-05, | |
| "loss": 0.7154, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.005093442193176286, | |
| "grad_norm": 0.34413284063339233, | |
| "learning_rate": 9.611448774886924e-05, | |
| "loss": 0.6867, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.0052432493165049995, | |
| "grad_norm": 0.3844047486782074, | |
| "learning_rate": 9.578866633275288e-05, | |
| "loss": 0.8222, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.005393056439833714, | |
| "grad_norm": 0.47779640555381775, | |
| "learning_rate": 9.545032675245813e-05, | |
| "loss": 0.8534, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.0055428635631624285, | |
| "grad_norm": 2.2068326473236084, | |
| "learning_rate": 9.509956150664796e-05, | |
| "loss": 0.8379, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.005692670686491143, | |
| "grad_norm": 0.41778677701950073, | |
| "learning_rate": 9.473646649103818e-05, | |
| "loss": 0.9006, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.005842477809819857, | |
| "grad_norm": 0.4241478145122528, | |
| "learning_rate": 9.43611409721806e-05, | |
| "loss": 0.9492, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.005992284933148571, | |
| "grad_norm": 0.4319832921028137, | |
| "learning_rate": 9.397368756032445e-05, | |
| "loss": 1.0443, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.0061420920564772856, | |
| "grad_norm": 0.36160099506378174, | |
| "learning_rate": 9.357421218136386e-05, | |
| "loss": 0.6914, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.006291899179806, | |
| "grad_norm": 0.4357793629169464, | |
| "learning_rate": 9.316282404787871e-05, | |
| "loss": 0.872, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.006441706303134714, | |
| "grad_norm": 0.4501705467700958, | |
| "learning_rate": 9.273963562927695e-05, | |
| "loss": 0.8288, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.006591513426463428, | |
| "grad_norm": 0.5566861033439636, | |
| "learning_rate": 9.230476262104677e-05, | |
| "loss": 0.9786, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.006741320549792143, | |
| "grad_norm": 0.5371454954147339, | |
| "learning_rate": 9.185832391312644e-05, | |
| "loss": 0.9145, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.006891127673120857, | |
| "grad_norm": 0.5671194791793823, | |
| "learning_rate": 9.140044155740101e-05, | |
| "loss": 0.9264, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.007040934796449571, | |
| "grad_norm": 0.6908413171768188, | |
| "learning_rate": 9.093124073433463e-05, | |
| "loss": 1.0477, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.007190741919778285, | |
| "grad_norm": 0.7245323061943054, | |
| "learning_rate": 9.045084971874738e-05, | |
| "loss": 0.9384, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.007340549043107, | |
| "grad_norm": 0.9785915613174438, | |
| "learning_rate": 8.995939984474624e-05, | |
| "loss": 1.132, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.007490356166435714, | |
| "grad_norm": 1.2804609537124634, | |
| "learning_rate": 8.945702546981969e-05, | |
| "loss": 1.0875, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.007490356166435714, | |
| "eval_loss": 0.8688188791275024, | |
| "eval_runtime": 568.3984, | |
| "eval_samples_per_second": 19.78, | |
| "eval_steps_per_second": 4.945, | |
| "step": 50 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 200, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 50, | |
| "stateful_callbacks": { | |
| "EarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 5, | |
| "early_stopping_threshold": 0.0 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 0 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.058823463960576e+16, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |