{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.02615832325147958, "eval_steps": 50, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0001307916162573979, "eval_loss": 3.9479501247406006, "eval_runtime": 37.5868, "eval_samples_per_second": 85.668, "eval_steps_per_second": 42.834, "step": 1 }, { "epoch": 0.001307916162573979, "grad_norm": 6.595814228057861, "learning_rate": 0.0002, "loss": 15.3333, "step": 10 }, { "epoch": 0.002615832325147958, "grad_norm": 6.2806596755981445, "learning_rate": 0.0002, "loss": 14.378, "step": 20 }, { "epoch": 0.003923748487721937, "grad_norm": 7.519875526428223, "learning_rate": 0.0002, "loss": 13.7137, "step": 30 }, { "epoch": 0.005231664650295916, "grad_norm": 7.988043785095215, "learning_rate": 0.0002, "loss": 13.6446, "step": 40 }, { "epoch": 0.006539580812869895, "grad_norm": 7.619246959686279, "learning_rate": 0.0002, "loss": 12.593, "step": 50 }, { "epoch": 0.006539580812869895, "eval_loss": 3.214905023574829, "eval_runtime": 37.5958, "eval_samples_per_second": 85.648, "eval_steps_per_second": 42.824, "step": 50 }, { "epoch": 0.007847496975443874, "grad_norm": 18.687952041625977, "learning_rate": 0.0002, "loss": 12.9873, "step": 60 }, { "epoch": 0.009155413138017853, "grad_norm": 8.790769577026367, "learning_rate": 0.0002, "loss": 12.423, "step": 70 }, { "epoch": 0.010463329300591831, "grad_norm": 8.85031509399414, "learning_rate": 0.0002, "loss": 12.0676, "step": 80 }, { "epoch": 0.011771245463165811, "grad_norm": 12.773844718933105, "learning_rate": 0.0002, "loss": 12.8019, "step": 90 }, { "epoch": 0.01307916162573979, "grad_norm": 11.019274711608887, "learning_rate": 0.0002, "loss": 12.2702, "step": 100 }, { "epoch": 0.01307916162573979, "eval_loss": 3.001326560974121, "eval_runtime": 37.3516, "eval_samples_per_second": 86.208, "eval_steps_per_second": 43.104, "step": 100 }, { "epoch": 0.014387077788313768, "grad_norm": 12.210068702697754, "learning_rate": 0.0002, "loss": 12.0112, "step": 110 }, { "epoch": 0.015694993950887748, "grad_norm": 11.919943809509277, "learning_rate": 0.0002, "loss": 11.319, "step": 120 }, { "epoch": 0.017002910113461726, "grad_norm": 10.63607120513916, "learning_rate": 0.0002, "loss": 11.7438, "step": 130 }, { "epoch": 0.018310826276035707, "grad_norm": 11.607667922973633, "learning_rate": 0.0002, "loss": 11.2511, "step": 140 }, { "epoch": 0.019618742438609685, "grad_norm": 13.221806526184082, "learning_rate": 0.0002, "loss": 11.3896, "step": 150 }, { "epoch": 0.019618742438609685, "eval_loss": 2.847592353820801, "eval_runtime": 37.3438, "eval_samples_per_second": 86.226, "eval_steps_per_second": 43.113, "step": 150 }, { "epoch": 0.020926658601183663, "grad_norm": 12.179810523986816, "learning_rate": 0.0002, "loss": 11.7368, "step": 160 }, { "epoch": 0.022234574763757644, "grad_norm": 14.598481178283691, "learning_rate": 0.0002, "loss": 11.1283, "step": 170 }, { "epoch": 0.023542490926331622, "grad_norm": 9.615200996398926, "learning_rate": 0.0002, "loss": 11.2235, "step": 180 }, { "epoch": 0.0248504070889056, "grad_norm": 10.755783081054688, "learning_rate": 0.0002, "loss": 11.2189, "step": 190 }, { "epoch": 0.02615832325147958, "grad_norm": 12.484338760375977, "learning_rate": 0.0002, "loss": 11.5377, "step": 200 }, { "epoch": 0.02615832325147958, "eval_loss": 2.7612085342407227, "eval_runtime": 37.0673, "eval_samples_per_second": 86.869, "eval_steps_per_second": 43.435, "step": 200 } ], "logging_steps": 10, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1546434851635200.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }