| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.973821989528796, | |
| "eval_steps": 500, | |
| "global_step": 285, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.10471204188481675, | |
| "grad_norm": 1.821414025646637, | |
| "learning_rate": 1.5517241379310346e-05, | |
| "loss": 0.4862, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.2094240837696335, | |
| "grad_norm": 0.3942393541183772, | |
| "learning_rate": 3.275862068965517e-05, | |
| "loss": 0.3896, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.31413612565445026, | |
| "grad_norm": 0.8255719356243104, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3473, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.418848167539267, | |
| "grad_norm": 0.37799831218550856, | |
| "learning_rate": 4.981198836496775e-05, | |
| "loss": 0.3207, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.5235602094240838, | |
| "grad_norm": 0.2838496737375545, | |
| "learning_rate": 4.9250781329863606e-05, | |
| "loss": 0.3069, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.6282722513089005, | |
| "grad_norm": 0.35016464634119265, | |
| "learning_rate": 4.8324819970868473e-05, | |
| "loss": 0.2959, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.7329842931937173, | |
| "grad_norm": 0.265970946973345, | |
| "learning_rate": 4.7048031608708876e-05, | |
| "loss": 0.2888, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.837696335078534, | |
| "grad_norm": 0.3283794303490618, | |
| "learning_rate": 4.5439620328789593e-05, | |
| "loss": 0.2812, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.9424083769633508, | |
| "grad_norm": 0.2933681591769702, | |
| "learning_rate": 4.352377813387398e-05, | |
| "loss": 0.2821, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.0418848167539267, | |
| "grad_norm": 0.784193010247334, | |
| "learning_rate": 4.1329321073844415e-05, | |
| "loss": 0.2646, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.1465968586387434, | |
| "grad_norm": 0.43774945809310173, | |
| "learning_rate": 3.888925582549006e-05, | |
| "loss": 0.2466, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 1.2513089005235603, | |
| "grad_norm": 0.24107111209505053, | |
| "learning_rate": 3.624028324136517e-05, | |
| "loss": 0.2429, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.356020942408377, | |
| "grad_norm": 0.20357460785884127, | |
| "learning_rate": 3.34222463348055e-05, | |
| "loss": 0.2408, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 1.4607329842931938, | |
| "grad_norm": 0.22761632175708865, | |
| "learning_rate": 3.0477531003921745e-05, | |
| "loss": 0.2407, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 1.5654450261780104, | |
| "grad_norm": 0.201935897320554, | |
| "learning_rate": 2.7450428508239024e-05, | |
| "loss": 0.237, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.6701570680628273, | |
| "grad_norm": 0.17116285919322816, | |
| "learning_rate": 2.4386469286927196e-05, | |
| "loss": 0.2366, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.7748691099476441, | |
| "grad_norm": 0.16436756820682696, | |
| "learning_rate": 2.1331738138615958e-05, | |
| "loss": 0.2357, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 1.8795811518324608, | |
| "grad_norm": 0.16052539184685044, | |
| "learning_rate": 1.8332181063127545e-05, | |
| "loss": 0.2338, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 1.9842931937172774, | |
| "grad_norm": 0.1441672107096996, | |
| "learning_rate": 1.5432914190872757e-05, | |
| "loss": 0.235, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 2.0837696335078535, | |
| "grad_norm": 0.18363079335917978, | |
| "learning_rate": 1.2677545194255402e-05, | |
| "loss": 0.207, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 2.18848167539267, | |
| "grad_norm": 0.15661072667443002, | |
| "learning_rate": 1.0107517387689166e-05, | |
| "loss": 0.2003, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 2.2931937172774868, | |
| "grad_norm": 0.13738033977955236, | |
| "learning_rate": 7.761486381573327e-06, | |
| "loss": 0.2002, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 2.3979057591623034, | |
| "grad_norm": 0.11864758554083908, | |
| "learning_rate": 5.674738665931575e-06, | |
| "loss": 0.2016, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 2.5026178010471205, | |
| "grad_norm": 0.11174553605583903, | |
| "learning_rate": 3.878660868757323e-06, | |
| "loss": 0.2018, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 2.607329842931937, | |
| "grad_norm": 0.10468557565480724, | |
| "learning_rate": 2.4002676719139166e-06, | |
| "loss": 0.1992, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 2.712041884816754, | |
| "grad_norm": 0.11078267776032583, | |
| "learning_rate": 1.2617954851740832e-06, | |
| "loss": 0.1994, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 2.816753926701571, | |
| "grad_norm": 0.09962367034420198, | |
| "learning_rate": 4.803679899192392e-07, | |
| "loss": 0.2002, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 2.9214659685863875, | |
| "grad_norm": 0.10060873138847887, | |
| "learning_rate": 6.773858303274483e-08, | |
| "loss": 0.1993, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 2.973821989528796, | |
| "step": 285, | |
| "total_flos": 2039941919932416.0, | |
| "train_loss": 0.25715315007326894, | |
| "train_runtime": 42232.1226, | |
| "train_samples_per_second": 3.469, | |
| "train_steps_per_second": 0.007 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 285, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 50, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2039941919932416.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |