{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.016333197223356473, "eval_steps": 5, "global_step": 30, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0005444399074452157, "eval_loss": 7.361711502075195, "eval_runtime": 5.2901, "eval_samples_per_second": 146.311, "eval_steps_per_second": 73.155, "step": 1 }, { "epoch": 0.0016333197223356473, "grad_norm": 3018.555419921875, "learning_rate": 4e-05, "loss": 29.4527, "step": 3 }, { "epoch": 0.0027221995372260785, "eval_loss": 7.326521396636963, "eval_runtime": 4.8792, "eval_samples_per_second": 158.633, "eval_steps_per_second": 79.317, "step": 5 }, { "epoch": 0.0032666394446712946, "grad_norm": 2035.9393310546875, "learning_rate": 8e-05, "loss": 29.6231, "step": 6 }, { "epoch": 0.004899959167006942, "grad_norm": 2877.90185546875, "learning_rate": 0.00012, "loss": 29.1023, "step": 9 }, { "epoch": 0.005444399074452157, "eval_loss": 7.256721019744873, "eval_runtime": 4.8765, "eval_samples_per_second": 158.721, "eval_steps_per_second": 79.361, "step": 10 }, { "epoch": 0.006533278889342589, "grad_norm": 2655.123291015625, "learning_rate": 0.00016, "loss": 28.3913, "step": 12 }, { "epoch": 0.008166598611678236, "grad_norm": 2249.07373046875, "learning_rate": 0.0002, "loss": 28.355, "step": 15 }, { "epoch": 0.008166598611678236, "eval_loss": 7.156546592712402, "eval_runtime": 4.8891, "eval_samples_per_second": 158.31, "eval_steps_per_second": 79.155, "step": 15 }, { "epoch": 0.009799918334013884, "grad_norm": 2144.675048828125, "learning_rate": 0.00018090169943749476, "loss": 29.4269, "step": 18 }, { "epoch": 0.010888798148904314, "eval_loss": 7.053538799285889, "eval_runtime": 4.9311, "eval_samples_per_second": 156.964, "eval_steps_per_second": 78.482, "step": 20 }, { "epoch": 0.011433238056349531, "grad_norm": 2027.64697265625, "learning_rate": 0.00013090169943749476, "loss": 28.5078, "step": 21 }, { "epoch": 0.013066557778685178, "grad_norm": 1969.8271484375, "learning_rate": 6.909830056250527e-05, "loss": 27.8338, "step": 24 }, { "epoch": 0.013610997686130393, "eval_loss": 7.016091823577881, "eval_runtime": 4.8933, "eval_samples_per_second": 158.175, "eval_steps_per_second": 79.087, "step": 25 }, { "epoch": 0.014699877501020826, "grad_norm": 2088.4873046875, "learning_rate": 1.9098300562505266e-05, "loss": 28.0861, "step": 27 }, { "epoch": 0.016333197223356473, "grad_norm": 2591.714599609375, "learning_rate": 0.0, "loss": 27.7559, "step": 30 }, { "epoch": 0.016333197223356473, "eval_loss": 7.001165866851807, "eval_runtime": 4.879, "eval_samples_per_second": 158.639, "eval_steps_per_second": 79.319, "step": 30 } ], "logging_steps": 3, "max_steps": 30, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 15, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 12533486321664.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }