| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.35842293906810035, | |
| "eval_steps": 50, | |
| "global_step": 200, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0017921146953405018, | |
| "eval_loss": 3.68882155418396, | |
| "eval_runtime": 2.8556, | |
| "eval_samples_per_second": 82.293, | |
| "eval_steps_per_second": 41.322, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.017921146953405017, | |
| "grad_norm": 4.795637607574463, | |
| "learning_rate": 0.0002, | |
| "loss": 14.3965, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.035842293906810034, | |
| "grad_norm": 5.258082389831543, | |
| "learning_rate": 0.0002, | |
| "loss": 13.813, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.053763440860215055, | |
| "grad_norm": 4.523959636688232, | |
| "learning_rate": 0.0002, | |
| "loss": 13.6853, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.07168458781362007, | |
| "grad_norm": 4.891673564910889, | |
| "learning_rate": 0.0002, | |
| "loss": 13.4352, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.08960573476702509, | |
| "grad_norm": 5.062295436859131, | |
| "learning_rate": 0.0002, | |
| "loss": 13.0299, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.08960573476702509, | |
| "eval_loss": 3.3310461044311523, | |
| "eval_runtime": 2.9394, | |
| "eval_samples_per_second": 79.947, | |
| "eval_steps_per_second": 40.144, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.10752688172043011, | |
| "grad_norm": 5.369076728820801, | |
| "learning_rate": 0.0002, | |
| "loss": 13.1517, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.12544802867383512, | |
| "grad_norm": 5.329068183898926, | |
| "learning_rate": 0.0002, | |
| "loss": 13.13, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.14336917562724014, | |
| "grad_norm": 5.421701431274414, | |
| "learning_rate": 0.0002, | |
| "loss": 13.0658, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.16129032258064516, | |
| "grad_norm": 5.6926751136779785, | |
| "learning_rate": 0.0002, | |
| "loss": 13.0606, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.17921146953405018, | |
| "grad_norm": 4.89196252822876, | |
| "learning_rate": 0.0002, | |
| "loss": 13.2831, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.17921146953405018, | |
| "eval_loss": 3.270508289337158, | |
| "eval_runtime": 3.02, | |
| "eval_samples_per_second": 77.814, | |
| "eval_steps_per_second": 39.073, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.1971326164874552, | |
| "grad_norm": 5.2668070793151855, | |
| "learning_rate": 0.0002, | |
| "loss": 13.1221, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.21505376344086022, | |
| "grad_norm": 4.916939735412598, | |
| "learning_rate": 0.0002, | |
| "loss": 12.991, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.23297491039426524, | |
| "grad_norm": 5.837742805480957, | |
| "learning_rate": 0.0002, | |
| "loss": 13.1578, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.25089605734767023, | |
| "grad_norm": 5.586097240447998, | |
| "learning_rate": 0.0002, | |
| "loss": 12.6136, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.26881720430107525, | |
| "grad_norm": 5.033875465393066, | |
| "learning_rate": 0.0002, | |
| "loss": 13.0117, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.26881720430107525, | |
| "eval_loss": 3.2495903968811035, | |
| "eval_runtime": 2.9557, | |
| "eval_samples_per_second": 79.507, | |
| "eval_steps_per_second": 39.923, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.2867383512544803, | |
| "grad_norm": 4.852773189544678, | |
| "learning_rate": 0.0002, | |
| "loss": 12.8406, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.3046594982078853, | |
| "grad_norm": 5.590361595153809, | |
| "learning_rate": 0.0002, | |
| "loss": 12.821, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.3225806451612903, | |
| "grad_norm": 6.373524188995361, | |
| "learning_rate": 0.0002, | |
| "loss": 12.7874, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.34050179211469533, | |
| "grad_norm": 4.901456356048584, | |
| "learning_rate": 0.0002, | |
| "loss": 12.9965, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.35842293906810035, | |
| "grad_norm": 5.205903053283691, | |
| "learning_rate": 0.0002, | |
| "loss": 12.7484, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.35842293906810035, | |
| "eval_loss": 3.230069875717163, | |
| "eval_runtime": 3.0502, | |
| "eval_samples_per_second": 77.044, | |
| "eval_steps_per_second": 38.686, | |
| "step": 200 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 200, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 50, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1501393059840000.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |