{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.7619047619047619, "eval_steps": 5, "global_step": 40, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01904761904761905, "eval_loss": 7.428563117980957, "eval_runtime": 0.4979, "eval_samples_per_second": 44.184, "eval_steps_per_second": 12.05, "step": 1 }, { "epoch": 0.05714285714285714, "grad_norm": 2354.0830078125, "learning_rate": 3e-05, "loss": 14.6828, "step": 3 }, { "epoch": 0.09523809523809523, "eval_loss": 7.463829040527344, "eval_runtime": 0.0988, "eval_samples_per_second": 222.589, "eval_steps_per_second": 60.706, "step": 5 }, { "epoch": 0.11428571428571428, "grad_norm": 2209.059814453125, "learning_rate": 6e-05, "loss": 14.6992, "step": 6 }, { "epoch": 0.17142857142857143, "grad_norm": 1919.69482421875, "learning_rate": 9e-05, "loss": 14.8264, "step": 9 }, { "epoch": 0.19047619047619047, "eval_loss": 7.168332099914551, "eval_runtime": 0.0976, "eval_samples_per_second": 225.344, "eval_steps_per_second": 61.458, "step": 10 }, { "epoch": 0.22857142857142856, "grad_norm": 1912.1866455078125, "learning_rate": 0.00012, "loss": 14.0871, "step": 12 }, { "epoch": 0.2857142857142857, "grad_norm": 2132.776123046875, "learning_rate": 0.00015000000000000001, "loss": 13.9853, "step": 15 }, { "epoch": 0.2857142857142857, "eval_loss": 6.985106945037842, "eval_runtime": 0.099, "eval_samples_per_second": 222.313, "eval_steps_per_second": 60.631, "step": 15 }, { "epoch": 0.34285714285714286, "grad_norm": 1974.28076171875, "learning_rate": 0.00018, "loss": 13.8445, "step": 18 }, { "epoch": 0.38095238095238093, "eval_loss": 6.910727024078369, "eval_runtime": 0.0955, "eval_samples_per_second": 230.251, "eval_steps_per_second": 62.796, "step": 20 }, { "epoch": 0.4, "grad_norm": 2037.5367431640625, "learning_rate": 0.00019876883405951377, "loss": 13.7064, "step": 21 }, { "epoch": 0.45714285714285713, "grad_norm": 1787.888671875, "learning_rate": 0.00018090169943749476, "loss": 13.8959, "step": 24 }, { "epoch": 0.47619047619047616, "eval_loss": 6.688287734985352, "eval_runtime": 0.0989, "eval_samples_per_second": 222.419, "eval_steps_per_second": 60.66, "step": 25 }, { "epoch": 0.5142857142857142, "grad_norm": 2035.425537109375, "learning_rate": 0.00014539904997395468, "loss": 13.6203, "step": 27 }, { "epoch": 0.5714285714285714, "grad_norm": 2253.046875, "learning_rate": 0.0001, "loss": 13.3387, "step": 30 }, { "epoch": 0.5714285714285714, "eval_loss": 6.705076217651367, "eval_runtime": 0.1247, "eval_samples_per_second": 176.415, "eval_steps_per_second": 48.113, "step": 30 }, { "epoch": 0.6285714285714286, "grad_norm": 1498.140380859375, "learning_rate": 5.4600950026045326e-05, "loss": 12.9927, "step": 33 }, { "epoch": 0.6666666666666666, "eval_loss": 6.740316390991211, "eval_runtime": 0.0977, "eval_samples_per_second": 225.14, "eval_steps_per_second": 61.402, "step": 35 }, { "epoch": 0.6857142857142857, "grad_norm": 1960.5146484375, "learning_rate": 1.9098300562505266e-05, "loss": 13.6963, "step": 36 }, { "epoch": 0.7428571428571429, "grad_norm": 1050.917724609375, "learning_rate": 1.231165940486234e-06, "loss": 13.3819, "step": 39 }, { "epoch": 0.7619047619047619, "eval_loss": 6.635408401489258, "eval_runtime": 0.0974, "eval_samples_per_second": 225.782, "eval_steps_per_second": 61.577, "step": 40 } ], "logging_steps": 3, "max_steps": 40, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 20, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 15385378160640.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }