{ "best_metric": 0.816862165927887, "best_model_checkpoint": "miner_id_24/checkpoint-100", "epoch": 0.008989976176563132, "eval_steps": 100, "global_step": 100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 8.989976176563132e-05, "eval_loss": 1.525974988937378, "eval_runtime": 653.7709, "eval_samples_per_second": 7.165, "eval_steps_per_second": 3.582, "step": 1 }, { "epoch": 0.0008989976176563133, "grad_norm": 27.566255569458008, "learning_rate": 2.5e-06, "loss": 5.9581, "step": 10 }, { "epoch": 0.0017979952353126265, "grad_norm": 23.980079650878906, "learning_rate": 5e-06, "loss": 5.7295, "step": 20 }, { "epoch": 0.0026969928529689395, "grad_norm": 22.77297592163086, "learning_rate": 7.5e-06, "loss": 4.7677, "step": 30 }, { "epoch": 0.003595990470625253, "grad_norm": 24.030942916870117, "learning_rate": 1e-05, "loss": 4.3247, "step": 40 }, { "epoch": 0.004494988088281566, "grad_norm": 18.87146759033203, "learning_rate": 1.25e-05, "loss": 3.8906, "step": 50 }, { "epoch": 0.005393985705937879, "grad_norm": 20.442569732666016, "learning_rate": 1.5e-05, "loss": 3.6275, "step": 60 }, { "epoch": 0.0062929833235941925, "grad_norm": 18.778696060180664, "learning_rate": 1.75e-05, "loss": 3.5476, "step": 70 }, { "epoch": 0.007191980941250506, "grad_norm": 17.43655776977539, "learning_rate": 2e-05, "loss": 3.4289, "step": 80 }, { "epoch": 0.008090978558906819, "grad_norm": 21.041603088378906, "learning_rate": 2.25e-05, "loss": 3.3832, "step": 90 }, { "epoch": 0.008989976176563132, "grad_norm": 18.440113067626953, "learning_rate": 2.5e-05, "loss": 3.3127, "step": 100 }, { "epoch": 0.008989976176563132, "eval_loss": 0.816862165927887, "eval_runtime": 653.3249, "eval_samples_per_second": 7.169, "eval_steps_per_second": 3.585, "step": 100 } ], "logging_steps": 10, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.066805854470144e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null }