{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 2.973821989528796, "eval_steps": 500, "global_step": 285, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.10471204188481675, "grad_norm": 1.821414025646637, "learning_rate": 1.5517241379310346e-05, "loss": 0.4862, "step": 10 }, { "epoch": 0.2094240837696335, "grad_norm": 0.3942393541183772, "learning_rate": 3.275862068965517e-05, "loss": 0.3896, "step": 20 }, { "epoch": 0.31413612565445026, "grad_norm": 0.8255719356243104, "learning_rate": 5e-05, "loss": 0.3473, "step": 30 }, { "epoch": 0.418848167539267, "grad_norm": 0.37799831218550856, "learning_rate": 4.981198836496775e-05, "loss": 0.3207, "step": 40 }, { "epoch": 0.5235602094240838, "grad_norm": 0.2838496737375545, "learning_rate": 4.9250781329863606e-05, "loss": 0.3069, "step": 50 }, { "epoch": 0.6282722513089005, "grad_norm": 0.35016464634119265, "learning_rate": 4.8324819970868473e-05, "loss": 0.2959, "step": 60 }, { "epoch": 0.7329842931937173, "grad_norm": 0.265970946973345, "learning_rate": 4.7048031608708876e-05, "loss": 0.2888, "step": 70 }, { "epoch": 0.837696335078534, "grad_norm": 0.3283794303490618, "learning_rate": 4.5439620328789593e-05, "loss": 0.2812, "step": 80 }, { "epoch": 0.9424083769633508, "grad_norm": 0.2933681591769702, "learning_rate": 4.352377813387398e-05, "loss": 0.2821, "step": 90 }, { "epoch": 1.0418848167539267, "grad_norm": 0.784193010247334, "learning_rate": 4.1329321073844415e-05, "loss": 0.2646, "step": 100 }, { "epoch": 1.1465968586387434, "grad_norm": 0.43774945809310173, "learning_rate": 3.888925582549006e-05, "loss": 0.2466, "step": 110 }, { "epoch": 1.2513089005235603, "grad_norm": 0.24107111209505053, "learning_rate": 3.624028324136517e-05, "loss": 0.2429, "step": 120 }, { "epoch": 1.356020942408377, "grad_norm": 0.20357460785884127, "learning_rate": 3.34222463348055e-05, "loss": 0.2408, "step": 130 }, { "epoch": 1.4607329842931938, "grad_norm": 0.22761632175708865, "learning_rate": 3.0477531003921745e-05, "loss": 0.2407, "step": 140 }, { "epoch": 1.5654450261780104, "grad_norm": 0.201935897320554, "learning_rate": 2.7450428508239024e-05, "loss": 0.237, "step": 150 }, { "epoch": 1.6701570680628273, "grad_norm": 0.17116285919322816, "learning_rate": 2.4386469286927196e-05, "loss": 0.2366, "step": 160 }, { "epoch": 1.7748691099476441, "grad_norm": 0.16436756820682696, "learning_rate": 2.1331738138615958e-05, "loss": 0.2357, "step": 170 }, { "epoch": 1.8795811518324608, "grad_norm": 0.16052539184685044, "learning_rate": 1.8332181063127545e-05, "loss": 0.2338, "step": 180 }, { "epoch": 1.9842931937172774, "grad_norm": 0.1441672107096996, "learning_rate": 1.5432914190872757e-05, "loss": 0.235, "step": 190 }, { "epoch": 2.0837696335078535, "grad_norm": 0.18363079335917978, "learning_rate": 1.2677545194255402e-05, "loss": 0.207, "step": 200 }, { "epoch": 2.18848167539267, "grad_norm": 0.15661072667443002, "learning_rate": 1.0107517387689166e-05, "loss": 0.2003, "step": 210 }, { "epoch": 2.2931937172774868, "grad_norm": 0.13738033977955236, "learning_rate": 7.761486381573327e-06, "loss": 0.2002, "step": 220 }, { "epoch": 2.3979057591623034, "grad_norm": 0.11864758554083908, "learning_rate": 5.674738665931575e-06, "loss": 0.2016, "step": 230 }, { "epoch": 2.5026178010471205, "grad_norm": 0.11174553605583903, "learning_rate": 3.878660868757323e-06, "loss": 0.2018, "step": 240 }, { "epoch": 2.607329842931937, "grad_norm": 0.10468557565480724, "learning_rate": 2.4002676719139166e-06, "loss": 0.1992, "step": 250 }, { "epoch": 2.712041884816754, "grad_norm": 0.11078267776032583, "learning_rate": 1.2617954851740832e-06, "loss": 0.1994, "step": 260 }, { "epoch": 2.816753926701571, "grad_norm": 0.09962367034420198, "learning_rate": 4.803679899192392e-07, "loss": 0.2002, "step": 270 }, { "epoch": 2.9214659685863875, "grad_norm": 0.10060873138847887, "learning_rate": 6.773858303274483e-08, "loss": 0.1993, "step": 280 }, { "epoch": 2.973821989528796, "step": 285, "total_flos": 2039941919932416.0, "train_loss": 0.25715315007326894, "train_runtime": 42232.1226, "train_samples_per_second": 3.469, "train_steps_per_second": 0.007 } ], "logging_steps": 10, "max_steps": 285, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2039941919932416.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }