{ "best_global_step": 1000, "best_metric": 0.3800264000892639, "best_model_checkpoint": "check-point/checkpoint-1000", "epoch": 7.763975155279503, "eval_steps": 500, "global_step": 2500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.5527950310559007, "eval_entropy": 0.36121490036231885, "eval_loss": 0.3807576298713684, "eval_mean_token_accuracy": 0.8985943699228591, "eval_num_tokens": 8180731.0, "eval_runtime": 8.7103, "eval_samples_per_second": 126.632, "eval_steps_per_second": 7.922, "step": 500 }, { "entropy": 0.45297335986024845, "epoch": 2.0, "grad_norm": 0.74609375, "learning_rate": 0.00016519230769230768, "loss": 0.4535, "mean_token_accuracy": 0.8832034343703193, "num_tokens": 10531260.0, "step": 644 }, { "epoch": 3.1055900621118013, "eval_entropy": 0.24365942028985507, "eval_loss": 0.3800264000892639, "eval_mean_token_accuracy": 0.9076558178749637, "eval_num_tokens": 16353946.0, "eval_runtime": 8.7106, "eval_samples_per_second": 126.627, "eval_steps_per_second": 7.921, "step": 1000 }, { "entropy": 0.2585039790372671, "epoch": 4.0, "grad_norm": 0.65234375, "learning_rate": 0.00012391025641025641, "loss": 0.2582, "mean_token_accuracy": 0.9224253959537293, "num_tokens": 21062520.0, "step": 1288 }, { "epoch": 4.658385093167702, "eval_entropy": 0.19808650362318841, "eval_loss": 0.3817984163761139, "eval_mean_token_accuracy": 0.9157982235369475, "eval_num_tokens": 24533627.0, "eval_runtime": 8.7626, "eval_samples_per_second": 125.876, "eval_steps_per_second": 7.874, "step": 1500 }, { "entropy": 0.13021414620535715, "epoch": 6.0, "grad_norm": 0.81640625, "learning_rate": 8.262820512820512e-05, "loss": 0.125, "mean_token_accuracy": 0.9599104345955464, "num_tokens": 31593780.0, "step": 1932 }, { "epoch": 6.211180124223603, "eval_entropy": 0.11409505208333333, "eval_loss": 0.4657194912433624, "eval_mean_token_accuracy": 0.9217576470927916, "eval_num_tokens": 32707230.0, "eval_runtime": 12.0522, "eval_samples_per_second": 91.519, "eval_steps_per_second": 5.725, "step": 2000 }, { "epoch": 7.763975155279503, "eval_entropy": 0.09595434216485507, "eval_loss": 0.5112115144729614, "eval_mean_token_accuracy": 0.9232899140620577, "eval_num_tokens": 40887516.0, "eval_runtime": 12.0489, "eval_samples_per_second": 91.544, "eval_steps_per_second": 5.727, "step": 2500 } ], "logging_steps": 644, "max_steps": 3220, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 8.784869136228557e+16, "train_batch_size": 16, "trial_name": null, "trial_params": null }