{ "best_metric": 2.6829404830932617, "best_model_checkpoint": "miner_id_24/checkpoint-30", "epoch": 0.0019504900606277327, "eval_steps": 5, "global_step": 30, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 6.501633535425775e-05, "eval_loss": 4.043483734130859, "eval_runtime": 359.3057, "eval_samples_per_second": 18.026, "eval_steps_per_second": 9.015, "step": 1 }, { "epoch": 0.00019504900606277329, "grad_norm": 4.626938343048096, "learning_rate": 6e-05, "loss": 3.9159, "step": 3 }, { "epoch": 0.00032508167677128876, "eval_loss": 3.4318971633911133, "eval_runtime": 361.5469, "eval_samples_per_second": 17.915, "eval_steps_per_second": 8.959, "step": 5 }, { "epoch": 0.00039009801212554657, "grad_norm": 3.844383478164673, "learning_rate": 0.00012, "loss": 3.6589, "step": 6 }, { "epoch": 0.0005851470181883198, "grad_norm": 3.1320598125457764, "learning_rate": 0.00018, "loss": 2.9485, "step": 9 }, { "epoch": 0.0006501633535425775, "eval_loss": 2.923651695251465, "eval_runtime": 360.2273, "eval_samples_per_second": 17.98, "eval_steps_per_second": 8.992, "step": 10 }, { "epoch": 0.0007801960242510931, "grad_norm": 3.082592725753784, "learning_rate": 0.00019510565162951537, "loss": 3.1049, "step": 12 }, { "epoch": 0.0009752450303138663, "grad_norm": 1.2514885663986206, "learning_rate": 0.00017071067811865476, "loss": 2.7436, "step": 15 }, { "epoch": 0.0009752450303138663, "eval_loss": 2.748525381088257, "eval_runtime": 360.1464, "eval_samples_per_second": 17.984, "eval_steps_per_second": 8.994, "step": 15 }, { "epoch": 0.0011702940363766397, "grad_norm": 1.15325129032135, "learning_rate": 0.00013090169943749476, "loss": 2.6917, "step": 18 }, { "epoch": 0.001300326707085155, "eval_loss": 2.705315351486206, "eval_runtime": 359.9377, "eval_samples_per_second": 17.995, "eval_steps_per_second": 8.999, "step": 20 }, { "epoch": 0.0013653430424394129, "grad_norm": 1.1571309566497803, "learning_rate": 8.435655349597689e-05, "loss": 2.6742, "step": 21 }, { "epoch": 0.0015603920485021863, "grad_norm": 1.0296660661697388, "learning_rate": 4.12214747707527e-05, "loss": 2.6416, "step": 24 }, { "epoch": 0.0016254083838564439, "eval_loss": 2.6872406005859375, "eval_runtime": 359.9275, "eval_samples_per_second": 17.995, "eval_steps_per_second": 8.999, "step": 25 }, { "epoch": 0.0017554410545649595, "grad_norm": 0.9758339524269104, "learning_rate": 1.0899347581163221e-05, "loss": 2.6216, "step": 27 }, { "epoch": 0.0019504900606277327, "grad_norm": 1.0820367336273193, "learning_rate": 0.0, "loss": 2.7111, "step": 30 }, { "epoch": 0.0019504900606277327, "eval_loss": 2.6829404830932617, "eval_runtime": 360.0892, "eval_samples_per_second": 17.987, "eval_steps_per_second": 8.995, "step": 30 } ], "logging_steps": 3, "max_steps": 30, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 10, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 1, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2951326036131840.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }