| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.00011343012704174228, | |
| "eval_steps": 2, | |
| "global_step": 10, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.1343012704174228e-05, | |
| "grad_norm": 1.3943215608596802, | |
| "learning_rate": 2e-05, | |
| "loss": 3.1809, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 1.1343012704174228e-05, | |
| "eval_loss": 3.224539041519165, | |
| "eval_runtime": 873.4294, | |
| "eval_samples_per_second": 42.499, | |
| "eval_steps_per_second": 21.25, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 2.2686025408348456e-05, | |
| "grad_norm": 1.2974414825439453, | |
| "learning_rate": 4e-05, | |
| "loss": 3.0873, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 2.2686025408348456e-05, | |
| "eval_loss": 3.22213077545166, | |
| "eval_runtime": 873.1974, | |
| "eval_samples_per_second": 42.51, | |
| "eval_steps_per_second": 21.255, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 3.402903811252269e-05, | |
| "grad_norm": 1.3224360942840576, | |
| "learning_rate": 6e-05, | |
| "loss": 3.0156, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 4.537205081669691e-05, | |
| "grad_norm": 1.4221491813659668, | |
| "learning_rate": 8e-05, | |
| "loss": 3.1623, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 4.537205081669691e-05, | |
| "eval_loss": 3.17517352104187, | |
| "eval_runtime": 873.0352, | |
| "eval_samples_per_second": 42.518, | |
| "eval_steps_per_second": 21.259, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 5.671506352087114e-05, | |
| "grad_norm": 1.588068962097168, | |
| "learning_rate": 0.0001, | |
| "loss": 3.1715, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 6.805807622504538e-05, | |
| "grad_norm": 1.5034862756729126, | |
| "learning_rate": 9.755282581475769e-05, | |
| "loss": 2.9514, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 6.805807622504538e-05, | |
| "eval_loss": 2.9851348400115967, | |
| "eval_runtime": 873.7444, | |
| "eval_samples_per_second": 42.484, | |
| "eval_steps_per_second": 21.242, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 7.94010889292196e-05, | |
| "grad_norm": 1.6210209131240845, | |
| "learning_rate": 9.045084971874738e-05, | |
| "loss": 2.9009, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 9.074410163339382e-05, | |
| "grad_norm": 1.660132884979248, | |
| "learning_rate": 7.938926261462366e-05, | |
| "loss": 2.7984, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 9.074410163339382e-05, | |
| "eval_loss": 2.79468035697937, | |
| "eval_runtime": 873.5775, | |
| "eval_samples_per_second": 42.492, | |
| "eval_steps_per_second": 21.246, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.00010208711433756806, | |
| "grad_norm": 1.53206205368042, | |
| "learning_rate": 6.545084971874738e-05, | |
| "loss": 2.7431, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.00011343012704174228, | |
| "grad_norm": 1.5387572050094604, | |
| "learning_rate": 5e-05, | |
| "loss": 2.6737, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.00011343012704174228, | |
| "eval_loss": 2.7065184116363525, | |
| "eval_runtime": 873.6742, | |
| "eval_samples_per_second": 42.487, | |
| "eval_steps_per_second": 21.244, | |
| "step": 10 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 15, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 5, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 481091030876160.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |