{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 228, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.13157894736842105, "grad_norm": 8.477547407637145, "learning_rate": 7.82608695652174e-06, "loss": 1.5102, "step": 10 }, { "epoch": 0.2631578947368421, "grad_norm": 2.7741164985922446, "learning_rate": 1.6521739130434785e-05, "loss": 0.8919, "step": 20 }, { "epoch": 0.39473684210526316, "grad_norm": 2.056306394468331, "learning_rate": 1.9957756633258264e-05, "loss": 0.8216, "step": 30 }, { "epoch": 0.5263157894736842, "grad_norm": 2.146307741121485, "learning_rate": 1.9700894071491736e-05, "loss": 0.8108, "step": 40 }, { "epoch": 0.6578947368421053, "grad_norm": 2.066709337428477, "learning_rate": 1.9216650774785975e-05, "loss": 0.7918, "step": 50 }, { "epoch": 0.7894736842105263, "grad_norm": 3.4358972900426865, "learning_rate": 1.8516376995590185e-05, "loss": 0.7514, "step": 60 }, { "epoch": 0.9210526315789473, "grad_norm": 2.106209316347196, "learning_rate": 1.7616486557764187e-05, "loss": 0.7833, "step": 70 }, { "epoch": 1.0526315789473684, "grad_norm": 1.4631406623409116, "learning_rate": 1.6538072130458853e-05, "loss": 0.7056, "step": 80 }, { "epoch": 1.1842105263157894, "grad_norm": 1.7317009771124054, "learning_rate": 1.5306410833828534e-05, "loss": 0.5845, "step": 90 }, { "epoch": 1.3157894736842106, "grad_norm": 1.6663659948073513, "learning_rate": 1.3950371764758543e-05, "loss": 0.603, "step": 100 }, { "epoch": 1.4473684210526316, "grad_norm": 1.5835467362397797, "learning_rate": 1.2501739329702453e-05, "loss": 0.5848, "step": 110 }, { "epoch": 1.5789473684210527, "grad_norm": 1.5087640437736358, "learning_rate": 1.0994468245134071e-05, "loss": 0.5757, "step": 120 }, { "epoch": 1.7105263157894737, "grad_norm": 1.6010692904865604, "learning_rate": 9.463887667771946e-06, "loss": 0.5573, "step": 130 }, { "epoch": 1.8421052631578947, "grad_norm": 1.518477136944885, "learning_rate": 7.945873109089503e-06, "loss": 0.5575, "step": 140 }, { "epoch": 1.973684210526316, "grad_norm": 1.410474762957939, "learning_rate": 6.476005543732783e-06, "loss": 0.5708, "step": 150 }, { "epoch": 2.1052631578947367, "grad_norm": 1.797519892522943, "learning_rate": 5.088737421631767e-06, "loss": 0.4269, "step": 160 }, { "epoch": 2.236842105263158, "grad_norm": 1.413640789584412, "learning_rate": 3.8165851317745705e-06, "loss": 0.3803, "step": 170 }, { "epoch": 2.3684210526315788, "grad_norm": 1.9426892837507217, "learning_rate": 2.6893668456091627e-06, "loss": 0.3743, "step": 180 }, { "epoch": 2.5, "grad_norm": 1.568653298779716, "learning_rate": 1.7335036043773402e-06, "loss": 0.3636, "step": 190 }, { "epoch": 2.6315789473684212, "grad_norm": 1.7450539491612516, "learning_rate": 9.714000323014705e-07, "loss": 0.3721, "step": 200 }, { "epoch": 2.763157894736842, "grad_norm": 1.7361396176890889, "learning_rate": 4.209191911819688e-07, "loss": 0.3777, "step": 210 }, { "epoch": 2.8947368421052633, "grad_norm": 1.5748925163724883, "learning_rate": 9.496388536875623e-08, "loss": 0.3594, "step": 220 } ], "logging_steps": 10, "max_steps": 228, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 14407662108672.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }