{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.19603038470963, "eval_steps": 50, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00098015192354815, "eval_loss": 10.373798370361328, "eval_runtime": 1.1582, "eval_samples_per_second": 371.252, "eval_steps_per_second": 185.626, "step": 1 }, { "epoch": 0.0098015192354815, "grad_norm": 0.036782316863536835, "learning_rate": 0.0002, "loss": 10.3749, "step": 10 }, { "epoch": 0.019603038470963, "grad_norm": 0.03539657965302467, "learning_rate": 0.0002, "loss": 10.3727, "step": 20 }, { "epoch": 0.029404557706444498, "grad_norm": 0.05783862620592117, "learning_rate": 0.0002, "loss": 10.3724, "step": 30 }, { "epoch": 0.039206076941926, "grad_norm": 0.054115310311317444, "learning_rate": 0.0002, "loss": 10.3728, "step": 40 }, { "epoch": 0.0490075961774075, "grad_norm": 0.07443208992481232, "learning_rate": 0.0002, "loss": 10.3677, "step": 50 }, { "epoch": 0.0490075961774075, "eval_loss": 10.365926742553711, "eval_runtime": 1.1706, "eval_samples_per_second": 367.33, "eval_steps_per_second": 183.665, "step": 50 }, { "epoch": 0.058809115412888996, "grad_norm": 0.08249140530824661, "learning_rate": 0.0002, "loss": 10.366, "step": 60 }, { "epoch": 0.0686106346483705, "grad_norm": 0.10592123121023178, "learning_rate": 0.0002, "loss": 10.3601, "step": 70 }, { "epoch": 0.078412153883852, "grad_norm": 0.09555134922266006, "learning_rate": 0.0002, "loss": 10.3583, "step": 80 }, { "epoch": 0.0882136731193335, "grad_norm": 0.10184665769338608, "learning_rate": 0.0002, "loss": 10.3525, "step": 90 }, { "epoch": 0.098015192354815, "grad_norm": 0.11435921490192413, "learning_rate": 0.0002, "loss": 10.3406, "step": 100 }, { "epoch": 0.098015192354815, "eval_loss": 10.337488174438477, "eval_runtime": 1.1718, "eval_samples_per_second": 366.95, "eval_steps_per_second": 183.475, "step": 100 }, { "epoch": 0.1078167115902965, "grad_norm": 0.08737977594137192, "learning_rate": 0.0002, "loss": 10.3371, "step": 110 }, { "epoch": 0.11761823082577799, "grad_norm": 0.08922211825847626, "learning_rate": 0.0002, "loss": 10.3303, "step": 120 }, { "epoch": 0.1274197500612595, "grad_norm": 0.05983508378267288, "learning_rate": 0.0002, "loss": 10.333, "step": 130 }, { "epoch": 0.137221269296741, "grad_norm": 0.07052959501743317, "learning_rate": 0.0002, "loss": 10.3318, "step": 140 }, { "epoch": 0.1470227885322225, "grad_norm": 0.08523882925510406, "learning_rate": 0.0002, "loss": 10.3309, "step": 150 }, { "epoch": 0.1470227885322225, "eval_loss": 10.32862663269043, "eval_runtime": 1.2288, "eval_samples_per_second": 349.922, "eval_steps_per_second": 174.961, "step": 150 }, { "epoch": 0.156824307767704, "grad_norm": 0.05235796049237251, "learning_rate": 0.0002, "loss": 10.3316, "step": 160 }, { "epoch": 0.16662582700318548, "grad_norm": 0.074577696621418, "learning_rate": 0.0002, "loss": 10.3277, "step": 170 }, { "epoch": 0.176427346238667, "grad_norm": 0.07419592142105103, "learning_rate": 0.0002, "loss": 10.3274, "step": 180 }, { "epoch": 0.1862288654741485, "grad_norm": 0.05108886584639549, "learning_rate": 0.0002, "loss": 10.3287, "step": 190 }, { "epoch": 0.19603038470963, "grad_norm": 0.1003885269165039, "learning_rate": 0.0002, "loss": 10.3248, "step": 200 }, { "epoch": 0.19603038470963, "eval_loss": 10.323884963989258, "eval_runtime": 1.2027, "eval_samples_per_second": 357.533, "eval_steps_per_second": 178.766, "step": 200 } ], "logging_steps": 10, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2586181632000.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }