| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.0018355949392647524, | |
| "eval_steps": 50, | |
| "global_step": 200, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 9.177974696323763e-06, | |
| "eval_loss": 4.806329250335693, | |
| "eval_runtime": 720.8581, | |
| "eval_samples_per_second": 63.642, | |
| "eval_steps_per_second": 31.822, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 9.177974696323762e-05, | |
| "grad_norm": 4.923719882965088, | |
| "learning_rate": 0.0002, | |
| "loss": 4.1985, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.00018355949392647524, | |
| "grad_norm": 4.686183452606201, | |
| "learning_rate": 0.0002, | |
| "loss": 3.1998, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.00027533924088971284, | |
| "grad_norm": 5.875890254974365, | |
| "learning_rate": 0.0002, | |
| "loss": 3.1813, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.00036711898785295047, | |
| "grad_norm": 3.501814126968384, | |
| "learning_rate": 0.0002, | |
| "loss": 2.7797, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.0004588987348161881, | |
| "grad_norm": 6.772341251373291, | |
| "learning_rate": 0.0002, | |
| "loss": 3.0822, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.0004588987348161881, | |
| "eval_loss": 2.9345030784606934, | |
| "eval_runtime": 721.0163, | |
| "eval_samples_per_second": 63.628, | |
| "eval_steps_per_second": 31.815, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.0005506784817794257, | |
| "grad_norm": 6.173509120941162, | |
| "learning_rate": 0.0002, | |
| "loss": 3.0467, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.0006424582287426633, | |
| "grad_norm": 2.9664249420166016, | |
| "learning_rate": 0.0002, | |
| "loss": 3.0159, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.0007342379757059009, | |
| "grad_norm": 3.5428833961486816, | |
| "learning_rate": 0.0002, | |
| "loss": 3.0559, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.0008260177226691386, | |
| "grad_norm": 4.495511054992676, | |
| "learning_rate": 0.0002, | |
| "loss": 2.8184, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.0009177974696323762, | |
| "grad_norm": 4.205393314361572, | |
| "learning_rate": 0.0002, | |
| "loss": 2.8674, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.0009177974696323762, | |
| "eval_loss": 2.848527431488037, | |
| "eval_runtime": 719.5853, | |
| "eval_samples_per_second": 63.755, | |
| "eval_steps_per_second": 31.878, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.0010095772165956138, | |
| "grad_norm": 5.495555877685547, | |
| "learning_rate": 0.0002, | |
| "loss": 2.8344, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.0011013569635588514, | |
| "grad_norm": 4.5758376121521, | |
| "learning_rate": 0.0002, | |
| "loss": 3.0487, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.001193136710522089, | |
| "grad_norm": 5.466923713684082, | |
| "learning_rate": 0.0002, | |
| "loss": 2.888, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.0012849164574853266, | |
| "grad_norm": 3.5546038150787354, | |
| "learning_rate": 0.0002, | |
| "loss": 2.8088, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.0013766962044485644, | |
| "grad_norm": 4.920299053192139, | |
| "learning_rate": 0.0002, | |
| "loss": 2.7983, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.0013766962044485644, | |
| "eval_loss": 2.8186566829681396, | |
| "eval_runtime": 721.1975, | |
| "eval_samples_per_second": 63.612, | |
| "eval_steps_per_second": 31.807, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.0014684759514118019, | |
| "grad_norm": 4.073799133300781, | |
| "learning_rate": 0.0002, | |
| "loss": 2.6564, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.0015602556983750396, | |
| "grad_norm": 4.067333221435547, | |
| "learning_rate": 0.0002, | |
| "loss": 2.609, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.0016520354453382772, | |
| "grad_norm": 5.727188587188721, | |
| "learning_rate": 0.0002, | |
| "loss": 2.7113, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.001743815192301515, | |
| "grad_norm": 4.845877647399902, | |
| "learning_rate": 0.0002, | |
| "loss": 2.8146, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.0018355949392647524, | |
| "grad_norm": 4.398877143859863, | |
| "learning_rate": 0.0002, | |
| "loss": 2.8552, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.0018355949392647524, | |
| "eval_loss": 2.7687087059020996, | |
| "eval_runtime": 719.1671, | |
| "eval_samples_per_second": 63.792, | |
| "eval_steps_per_second": 31.897, | |
| "step": 200 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 200, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 50, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 9789555867648000.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |