{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.22876751501286818, "eval_steps": 50, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.001143837575064341, "eval_loss": 0.9352622032165527, "eval_runtime": 19.8223, "eval_samples_per_second": 18.615, "eval_steps_per_second": 9.333, "step": 1 }, { "epoch": 0.01143837575064341, "grad_norm": 1.0111253261566162, "learning_rate": 0.0002, "loss": 3.4475, "step": 10 }, { "epoch": 0.02287675150128682, "grad_norm": 1.0076441764831543, "learning_rate": 0.0002, "loss": 3.1352, "step": 20 }, { "epoch": 0.03431512725193023, "grad_norm": 1.113232135772705, "learning_rate": 0.0002, "loss": 2.8919, "step": 30 }, { "epoch": 0.04575350300257364, "grad_norm": 1.1204644441604614, "learning_rate": 0.0002, "loss": 2.9899, "step": 40 }, { "epoch": 0.057191878753217046, "grad_norm": 0.9739717841148376, "learning_rate": 0.0002, "loss": 2.9405, "step": 50 }, { "epoch": 0.057191878753217046, "eval_loss": 0.7362952828407288, "eval_runtime": 19.921, "eval_samples_per_second": 18.523, "eval_steps_per_second": 9.287, "step": 50 }, { "epoch": 0.06863025450386046, "grad_norm": 0.8366644978523254, "learning_rate": 0.0002, "loss": 2.9592, "step": 60 }, { "epoch": 0.08006863025450386, "grad_norm": 0.8385974764823914, "learning_rate": 0.0002, "loss": 2.9791, "step": 70 }, { "epoch": 0.09150700600514727, "grad_norm": 0.8647813200950623, "learning_rate": 0.0002, "loss": 2.8695, "step": 80 }, { "epoch": 0.10294538175579068, "grad_norm": 0.7855409383773804, "learning_rate": 0.0002, "loss": 3.2081, "step": 90 }, { "epoch": 0.11438375750643409, "grad_norm": 2.0227108001708984, "learning_rate": 0.0002, "loss": 2.9471, "step": 100 }, { "epoch": 0.11438375750643409, "eval_loss": 0.7189851999282837, "eval_runtime": 19.8343, "eval_samples_per_second": 18.604, "eval_steps_per_second": 9.327, "step": 100 }, { "epoch": 0.1258221332570775, "grad_norm": 0.8634002208709717, "learning_rate": 0.0002, "loss": 2.9229, "step": 110 }, { "epoch": 0.1372605090077209, "grad_norm": 0.8289279937744141, "learning_rate": 0.0002, "loss": 2.7623, "step": 120 }, { "epoch": 0.14869888475836432, "grad_norm": 0.8797029852867126, "learning_rate": 0.0002, "loss": 2.9304, "step": 130 }, { "epoch": 0.16013726050900773, "grad_norm": 1.5126310586929321, "learning_rate": 0.0002, "loss": 2.7309, "step": 140 }, { "epoch": 0.17157563625965114, "grad_norm": 0.9973982572555542, "learning_rate": 0.0002, "loss": 2.8402, "step": 150 }, { "epoch": 0.17157563625965114, "eval_loss": 0.7114607095718384, "eval_runtime": 19.8671, "eval_samples_per_second": 18.573, "eval_steps_per_second": 9.312, "step": 150 }, { "epoch": 0.18301401201029455, "grad_norm": 1.0085833072662354, "learning_rate": 0.0002, "loss": 2.9839, "step": 160 }, { "epoch": 0.19445238776093796, "grad_norm": 0.9592446088790894, "learning_rate": 0.0002, "loss": 2.7984, "step": 170 }, { "epoch": 0.20589076351158137, "grad_norm": 1.625331163406372, "learning_rate": 0.0002, "loss": 2.7971, "step": 180 }, { "epoch": 0.21732913926222477, "grad_norm": 1.4084880352020264, "learning_rate": 0.0002, "loss": 2.6103, "step": 190 }, { "epoch": 0.22876751501286818, "grad_norm": 0.64777672290802, "learning_rate": 0.0002, "loss": 3.0829, "step": 200 }, { "epoch": 0.22876751501286818, "eval_loss": 0.7066531181335449, "eval_runtime": 19.8714, "eval_samples_per_second": 18.569, "eval_steps_per_second": 9.31, "step": 200 } ], "logging_steps": 10, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.605261891036774e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }