| { | |
| "best_metric": 0.6033799052238464, | |
| "best_model_checkpoint": "miner_id_24/checkpoint-50", | |
| "epoch": 0.7782101167315175, | |
| "eval_steps": 50, | |
| "global_step": 50, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.01556420233463035, | |
| "grad_norm": 9.139370918273926, | |
| "learning_rate": 1e-05, | |
| "loss": 0.8535, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.01556420233463035, | |
| "eval_loss": 0.888887882232666, | |
| "eval_runtime": 4.8903, | |
| "eval_samples_per_second": 22.085, | |
| "eval_steps_per_second": 5.521, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0311284046692607, | |
| "grad_norm": 9.370291709899902, | |
| "learning_rate": 2e-05, | |
| "loss": 0.9253, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.04669260700389105, | |
| "grad_norm": 7.656525611877441, | |
| "learning_rate": 3e-05, | |
| "loss": 0.9002, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.0622568093385214, | |
| "grad_norm": 3.6497480869293213, | |
| "learning_rate": 4e-05, | |
| "loss": 0.8366, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.07782101167315175, | |
| "grad_norm": 0.8219636082649231, | |
| "learning_rate": 5e-05, | |
| "loss": 0.7402, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.0933852140077821, | |
| "grad_norm": 0.8171916604042053, | |
| "learning_rate": 6e-05, | |
| "loss": 0.7317, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.10894941634241245, | |
| "grad_norm": 0.7215791940689087, | |
| "learning_rate": 7e-05, | |
| "loss": 0.7295, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.1245136186770428, | |
| "grad_norm": 0.6683509945869446, | |
| "learning_rate": 8e-05, | |
| "loss": 0.6973, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.14007782101167315, | |
| "grad_norm": 0.6624220609664917, | |
| "learning_rate": 9e-05, | |
| "loss": 0.7134, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.1556420233463035, | |
| "grad_norm": 0.7774540781974792, | |
| "learning_rate": 0.0001, | |
| "loss": 0.6417, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.17120622568093385, | |
| "grad_norm": 0.6136654019355774, | |
| "learning_rate": 9.999263238525136e-05, | |
| "loss": 0.6314, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.1867704280155642, | |
| "grad_norm": 0.5942745208740234, | |
| "learning_rate": 9.997053171227526e-05, | |
| "loss": 0.6433, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.20233463035019456, | |
| "grad_norm": 0.7222641706466675, | |
| "learning_rate": 9.993370449424153e-05, | |
| "loss": 0.6777, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.2178988326848249, | |
| "grad_norm": 0.6611757278442383, | |
| "learning_rate": 9.988216158430033e-05, | |
| "loss": 0.6507, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.23346303501945526, | |
| "grad_norm": 0.5460258722305298, | |
| "learning_rate": 9.981591817238378e-05, | |
| "loss": 0.625, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.2490272373540856, | |
| "grad_norm": 0.5874778628349304, | |
| "learning_rate": 9.973499378072945e-05, | |
| "loss": 0.6796, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.26459143968871596, | |
| "grad_norm": 0.42067644000053406, | |
| "learning_rate": 9.963941225812701e-05, | |
| "loss": 0.6118, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.2801556420233463, | |
| "grad_norm": 0.4198527932167053, | |
| "learning_rate": 9.952920177288986e-05, | |
| "loss": 0.6576, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.29571984435797666, | |
| "grad_norm": 0.45528435707092285, | |
| "learning_rate": 9.940439480455386e-05, | |
| "loss": 0.6441, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.311284046692607, | |
| "grad_norm": 0.4378451406955719, | |
| "learning_rate": 9.926502813430545e-05, | |
| "loss": 0.5701, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.32684824902723736, | |
| "grad_norm": 0.4284273386001587, | |
| "learning_rate": 9.911114283414205e-05, | |
| "loss": 0.5692, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.3424124513618677, | |
| "grad_norm": 0.4425485134124756, | |
| "learning_rate": 9.89427842547679e-05, | |
| "loss": 0.6107, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.35797665369649806, | |
| "grad_norm": 0.4803674519062042, | |
| "learning_rate": 9.876000201222912e-05, | |
| "loss": 0.6374, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.3735408560311284, | |
| "grad_norm": 0.45156747102737427, | |
| "learning_rate": 9.856284997329158e-05, | |
| "loss": 0.6015, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.38910505836575876, | |
| "grad_norm": 0.47944512963294983, | |
| "learning_rate": 9.835138623956603e-05, | |
| "loss": 0.6153, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.4046692607003891, | |
| "grad_norm": 0.5218237638473511, | |
| "learning_rate": 9.812567313038542e-05, | |
| "loss": 0.6695, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.42023346303501946, | |
| "grad_norm": 0.4772241711616516, | |
| "learning_rate": 9.788577716443902e-05, | |
| "loss": 0.6116, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.4357976653696498, | |
| "grad_norm": 0.48415690660476685, | |
| "learning_rate": 9.763176904016913e-05, | |
| "loss": 0.6178, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.45136186770428016, | |
| "grad_norm": 0.5117486119270325, | |
| "learning_rate": 9.736372361493584e-05, | |
| "loss": 0.6859, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.4669260700389105, | |
| "grad_norm": 0.4883125126361847, | |
| "learning_rate": 9.708171988295631e-05, | |
| "loss": 0.6596, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.48249027237354086, | |
| "grad_norm": 0.494489848613739, | |
| "learning_rate": 9.678584095202468e-05, | |
| "loss": 0.6489, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.4980544747081712, | |
| "grad_norm": 0.5451928973197937, | |
| "learning_rate": 9.647617401902002e-05, | |
| "loss": 0.6672, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.5136186770428015, | |
| "grad_norm": 0.36875829100608826, | |
| "learning_rate": 9.61528103442088e-05, | |
| "loss": 0.5941, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.5291828793774319, | |
| "grad_norm": 0.39669832587242126, | |
| "learning_rate": 9.581584522435024e-05, | |
| "loss": 0.6472, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.5447470817120622, | |
| "grad_norm": 0.37171563506126404, | |
| "learning_rate": 9.546537796461179e-05, | |
| "loss": 0.5654, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.5603112840466926, | |
| "grad_norm": 0.3935498893260956, | |
| "learning_rate": 9.510151184930354e-05, | |
| "loss": 0.6021, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.5758754863813229, | |
| "grad_norm": 0.43762218952178955, | |
| "learning_rate": 9.472435411143978e-05, | |
| "loss": 0.6841, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.5914396887159533, | |
| "grad_norm": 0.42783230543136597, | |
| "learning_rate": 9.433401590113701e-05, | |
| "loss": 0.6359, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.6070038910505836, | |
| "grad_norm": 0.4376448094844818, | |
| "learning_rate": 9.393061225285743e-05, | |
| "loss": 0.656, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.622568093385214, | |
| "grad_norm": 0.40691596269607544, | |
| "learning_rate": 9.351426205150777e-05, | |
| "loss": 0.6235, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.6381322957198443, | |
| "grad_norm": 0.45587652921676636, | |
| "learning_rate": 9.308508799740341e-05, | |
| "loss": 0.6901, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.6536964980544747, | |
| "grad_norm": 0.4364193379878998, | |
| "learning_rate": 9.2643216570108e-05, | |
| "loss": 0.6447, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.669260700389105, | |
| "grad_norm": 0.4265640079975128, | |
| "learning_rate": 9.218877799115928e-05, | |
| "loss": 0.6589, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.6848249027237354, | |
| "grad_norm": 0.4682093560695648, | |
| "learning_rate": 9.172190618569236e-05, | |
| "loss": 0.6511, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.7003891050583657, | |
| "grad_norm": 0.43623587489128113, | |
| "learning_rate": 9.124273874297122e-05, | |
| "loss": 0.5936, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.7159533073929961, | |
| "grad_norm": 0.48344677686691284, | |
| "learning_rate": 9.075141687584057e-05, | |
| "loss": 0.6775, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.7315175097276264, | |
| "grad_norm": 0.4627096652984619, | |
| "learning_rate": 9.024808537910981e-05, | |
| "loss": 0.5983, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.7470817120622568, | |
| "grad_norm": 0.49207404255867004, | |
| "learning_rate": 8.973289258688125e-05, | |
| "loss": 0.6198, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.7626459143968871, | |
| "grad_norm": 0.3565536439418793, | |
| "learning_rate": 8.920599032883554e-05, | |
| "loss": 0.6415, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.7782101167315175, | |
| "grad_norm": 0.37106260657310486, | |
| "learning_rate": 8.86675338854865e-05, | |
| "loss": 0.6197, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.7782101167315175, | |
| "eval_loss": 0.6033799052238464, | |
| "eval_runtime": 5.08, | |
| "eval_samples_per_second": 21.26, | |
| "eval_steps_per_second": 5.315, | |
| "step": 50 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 193, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 4, | |
| "save_steps": 50, | |
| "stateful_callbacks": { | |
| "EarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 5, | |
| "early_stopping_threshold": 0.0 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 0 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.481100047777792e+16, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |