{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9361702127659575, "eval_steps": 25, "global_step": 105, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.28368794326241137, "grad_norm": 0.26462894678115845, "learning_rate": 0.00018181818181818183, "loss": 0.9378, "step": 10 }, { "epoch": 0.5673758865248227, "grad_norm": 0.14169304072856903, "learning_rate": 0.00019551024972069126, "loss": 0.8496, "step": 20 }, { "epoch": 0.7092198581560284, "eval_loss": 0.8489587306976318, "eval_runtime": 26.6774, "eval_samples_per_second": 18.742, "eval_steps_per_second": 1.2, "step": 25 }, { "epoch": 0.851063829787234, "grad_norm": 0.127975732088089, "learning_rate": 0.0001805070053127563, "loss": 0.861, "step": 30 }, { "epoch": 1.1134751773049645, "grad_norm": 0.15211418271064758, "learning_rate": 0.00015659470943305955, "loss": 0.8392, "step": 40 }, { "epoch": 1.397163120567376, "grad_norm": 0.1456218957901001, "learning_rate": 0.00012641954018712863, "loss": 0.784, "step": 50 }, { "epoch": 1.397163120567376, "eval_loss": 0.818655252456665, "eval_runtime": 26.6767, "eval_samples_per_second": 18.743, "eval_steps_per_second": 1.2, "step": 50 }, { "epoch": 1.6808510638297873, "grad_norm": 0.14745110273361206, "learning_rate": 9.332073662548784e-05, "loss": 0.7885, "step": 60 }, { "epoch": 1.9645390070921986, "grad_norm": 0.15153422951698303, "learning_rate": 6.0961072483650526e-05, "loss": 0.7773, "step": 70 }, { "epoch": 2.0851063829787235, "eval_loss": 0.8111562132835388, "eval_runtime": 26.6835, "eval_samples_per_second": 18.738, "eval_steps_per_second": 1.199, "step": 75 }, { "epoch": 2.226950354609929, "grad_norm": 0.1788651943206787, "learning_rate": 3.292152698607768e-05, "loss": 0.7987, "step": 80 }, { "epoch": 2.5106382978723403, "grad_norm": 0.15647704899311066, "learning_rate": 1.230500717933285e-05, "loss": 0.74, "step": 90 }, { "epoch": 2.794326241134752, "grad_norm": 0.1778271496295929, "learning_rate": 1.3929746009971433e-06, "loss": 0.754, "step": 100 }, { "epoch": 2.794326241134752, "eval_loss": 0.8111380338668823, "eval_runtime": 26.682, "eval_samples_per_second": 18.739, "eval_steps_per_second": 1.199, "step": 100 }, { "epoch": 2.9361702127659575, "step": 105, "total_flos": 8.675771489095516e+17, "train_loss": 0.8103648549034482, "train_runtime": 2078.8921, "train_samples_per_second": 6.494, "train_steps_per_second": 0.051 } ], "logging_steps": 10, "max_steps": 105, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 250, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 8.675771489095516e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }