| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.7619047619047619, | |
| "eval_steps": 5, | |
| "global_step": 40, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.01904761904761905, | |
| "eval_loss": 7.428563117980957, | |
| "eval_runtime": 0.4979, | |
| "eval_samples_per_second": 44.184, | |
| "eval_steps_per_second": 12.05, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.05714285714285714, | |
| "grad_norm": 2354.0830078125, | |
| "learning_rate": 3e-05, | |
| "loss": 14.6828, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.09523809523809523, | |
| "eval_loss": 7.463829040527344, | |
| "eval_runtime": 0.0988, | |
| "eval_samples_per_second": 222.589, | |
| "eval_steps_per_second": 60.706, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.11428571428571428, | |
| "grad_norm": 2209.059814453125, | |
| "learning_rate": 6e-05, | |
| "loss": 14.6992, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.17142857142857143, | |
| "grad_norm": 1919.69482421875, | |
| "learning_rate": 9e-05, | |
| "loss": 14.8264, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.19047619047619047, | |
| "eval_loss": 7.168332099914551, | |
| "eval_runtime": 0.0976, | |
| "eval_samples_per_second": 225.344, | |
| "eval_steps_per_second": 61.458, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.22857142857142856, | |
| "grad_norm": 1912.1866455078125, | |
| "learning_rate": 0.00012, | |
| "loss": 14.0871, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.2857142857142857, | |
| "grad_norm": 2132.776123046875, | |
| "learning_rate": 0.00015000000000000001, | |
| "loss": 13.9853, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.2857142857142857, | |
| "eval_loss": 6.985106945037842, | |
| "eval_runtime": 0.099, | |
| "eval_samples_per_second": 222.313, | |
| "eval_steps_per_second": 60.631, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.34285714285714286, | |
| "grad_norm": 1974.28076171875, | |
| "learning_rate": 0.00018, | |
| "loss": 13.8445, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.38095238095238093, | |
| "eval_loss": 6.910727024078369, | |
| "eval_runtime": 0.0955, | |
| "eval_samples_per_second": 230.251, | |
| "eval_steps_per_second": 62.796, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 2037.5367431640625, | |
| "learning_rate": 0.00019876883405951377, | |
| "loss": 13.7064, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.45714285714285713, | |
| "grad_norm": 1787.888671875, | |
| "learning_rate": 0.00018090169943749476, | |
| "loss": 13.8959, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.47619047619047616, | |
| "eval_loss": 6.688287734985352, | |
| "eval_runtime": 0.0989, | |
| "eval_samples_per_second": 222.419, | |
| "eval_steps_per_second": 60.66, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.5142857142857142, | |
| "grad_norm": 2035.425537109375, | |
| "learning_rate": 0.00014539904997395468, | |
| "loss": 13.6203, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.5714285714285714, | |
| "grad_norm": 2253.046875, | |
| "learning_rate": 0.0001, | |
| "loss": 13.3387, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.5714285714285714, | |
| "eval_loss": 6.705076217651367, | |
| "eval_runtime": 0.1247, | |
| "eval_samples_per_second": 176.415, | |
| "eval_steps_per_second": 48.113, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.6285714285714286, | |
| "grad_norm": 1498.140380859375, | |
| "learning_rate": 5.4600950026045326e-05, | |
| "loss": 12.9927, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.6666666666666666, | |
| "eval_loss": 6.740316390991211, | |
| "eval_runtime": 0.0977, | |
| "eval_samples_per_second": 225.14, | |
| "eval_steps_per_second": 61.402, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.6857142857142857, | |
| "grad_norm": 1960.5146484375, | |
| "learning_rate": 1.9098300562505266e-05, | |
| "loss": 13.6963, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.7428571428571429, | |
| "grad_norm": 1050.917724609375, | |
| "learning_rate": 1.231165940486234e-06, | |
| "loss": 13.3819, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.7619047619047619, | |
| "eval_loss": 6.635408401489258, | |
| "eval_runtime": 0.0974, | |
| "eval_samples_per_second": 225.782, | |
| "eval_steps_per_second": 61.577, | |
| "step": 40 | |
| } | |
| ], | |
| "logging_steps": 3, | |
| "max_steps": 40, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 20, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 15385378160640.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |