{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.18018018018018017, "eval_steps": 50, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0009009009009009009, "eval_loss": 0.91037517786026, "eval_runtime": 35.0182, "eval_samples_per_second": 13.364, "eval_steps_per_second": 6.682, "step": 1 }, { "epoch": 0.0045045045045045045, "grad_norm": 0.8114346265792847, "learning_rate": 5e-05, "loss": 3.5218, "step": 5 }, { "epoch": 0.009009009009009009, "grad_norm": 0.894910454750061, "learning_rate": 0.0001, "loss": 3.4258, "step": 10 }, { "epoch": 0.013513513513513514, "grad_norm": 1.7237918376922607, "learning_rate": 9.98292246503335e-05, "loss": 3.5609, "step": 15 }, { "epoch": 0.018018018018018018, "grad_norm": 1.525421380996704, "learning_rate": 9.931806517013612e-05, "loss": 3.4364, "step": 20 }, { "epoch": 0.02252252252252252, "grad_norm": 2.1673266887664795, "learning_rate": 9.847001329696653e-05, "loss": 3.0213, "step": 25 }, { "epoch": 0.02702702702702703, "grad_norm": 2.467672109603882, "learning_rate": 9.729086208503174e-05, "loss": 3.2581, "step": 30 }, { "epoch": 0.03153153153153153, "grad_norm": 1.993414044380188, "learning_rate": 9.578866633275288e-05, "loss": 2.8398, "step": 35 }, { "epoch": 0.036036036036036036, "grad_norm": 2.092423439025879, "learning_rate": 9.397368756032445e-05, "loss": 2.5646, "step": 40 }, { "epoch": 0.04054054054054054, "grad_norm": 2.6000351905822754, "learning_rate": 9.185832391312644e-05, "loss": 2.7034, "step": 45 }, { "epoch": 0.04504504504504504, "grad_norm": 3.2636234760284424, "learning_rate": 8.945702546981969e-05, "loss": 2.5391, "step": 50 }, { "epoch": 0.04504504504504504, "eval_loss": 0.671043872833252, "eval_runtime": 35.5873, "eval_samples_per_second": 13.151, "eval_steps_per_second": 6.575, "step": 50 }, { "epoch": 0.04954954954954955, "grad_norm": 2.2121753692626953, "learning_rate": 8.678619553365659e-05, "loss": 3.0284, "step": 55 }, { "epoch": 0.05405405405405406, "grad_norm": 1.6796329021453857, "learning_rate": 8.386407858128706e-05, "loss": 2.949, "step": 60 }, { "epoch": 0.05855855855855856, "grad_norm": 1.8998647928237915, "learning_rate": 8.07106356344834e-05, "loss": 2.8002, "step": 65 }, { "epoch": 0.06306306306306306, "grad_norm": 1.9170352220535278, "learning_rate": 7.734740790612136e-05, "loss": 2.7579, "step": 70 }, { "epoch": 0.06756756756756757, "grad_norm": 2.008878707885742, "learning_rate": 7.379736965185368e-05, "loss": 2.893, "step": 75 }, { "epoch": 0.07207207207207207, "grad_norm": 2.4726173877716064, "learning_rate": 7.008477123264848e-05, "loss": 2.6539, "step": 80 }, { "epoch": 0.07657657657657657, "grad_norm": 2.354423761367798, "learning_rate": 6.623497346023418e-05, "loss": 2.4179, "step": 85 }, { "epoch": 0.08108108108108109, "grad_norm": 2.5047824382781982, "learning_rate": 6.227427435703997e-05, "loss": 2.6915, "step": 90 }, { "epoch": 0.08558558558558559, "grad_norm": 2.5813536643981934, "learning_rate": 5.8229729514036705e-05, "loss": 2.5024, "step": 95 }, { "epoch": 0.09009009009009009, "grad_norm": 3.199880361557007, "learning_rate": 5.4128967273616625e-05, "loss": 2.1346, "step": 100 }, { "epoch": 0.09009009009009009, "eval_loss": 0.630311906337738, "eval_runtime": 35.5839, "eval_samples_per_second": 13.152, "eval_steps_per_second": 6.576, "step": 100 }, { "epoch": 0.0945945945945946, "grad_norm": 2.01957368850708, "learning_rate": 5e-05, "loss": 2.8427, "step": 105 }, { "epoch": 0.0990990990990991, "grad_norm": 1.7157238721847534, "learning_rate": 4.5871032726383386e-05, "loss": 2.6927, "step": 110 }, { "epoch": 0.1036036036036036, "grad_norm": 1.892593264579773, "learning_rate": 4.17702704859633e-05, "loss": 2.7364, "step": 115 }, { "epoch": 0.10810810810810811, "grad_norm": 1.8269039392471313, "learning_rate": 3.772572564296005e-05, "loss": 2.6854, "step": 120 }, { "epoch": 0.11261261261261261, "grad_norm": 2.3659868240356445, "learning_rate": 3.3765026539765834e-05, "loss": 2.4613, "step": 125 }, { "epoch": 0.11711711711711711, "grad_norm": 2.2974438667297363, "learning_rate": 2.991522876735154e-05, "loss": 2.5113, "step": 130 }, { "epoch": 0.12162162162162163, "grad_norm": 2.6664466857910156, "learning_rate": 2.6202630348146324e-05, "loss": 2.4434, "step": 135 }, { "epoch": 0.12612612612612611, "grad_norm": 2.160093069076538, "learning_rate": 2.2652592093878666e-05, "loss": 2.4086, "step": 140 }, { "epoch": 0.13063063063063063, "grad_norm": 2.7726094722747803, "learning_rate": 1.928936436551661e-05, "loss": 2.5122, "step": 145 }, { "epoch": 0.13513513513513514, "grad_norm": 2.753997325897217, "learning_rate": 1.6135921418712956e-05, "loss": 2.0832, "step": 150 }, { "epoch": 0.13513513513513514, "eval_loss": 0.6134156584739685, "eval_runtime": 35.6006, "eval_samples_per_second": 13.146, "eval_steps_per_second": 6.573, "step": 150 }, { "epoch": 0.13963963963963963, "grad_norm": 2.1443870067596436, "learning_rate": 1.3213804466343421e-05, "loss": 2.7748, "step": 155 }, { "epoch": 0.14414414414414414, "grad_norm": 2.107656240463257, "learning_rate": 1.0542974530180327e-05, "loss": 2.7739, "step": 160 }, { "epoch": 0.14864864864864866, "grad_norm": 1.9297782182693481, "learning_rate": 8.141676086873572e-06, "loss": 2.5551, "step": 165 }, { "epoch": 0.15315315315315314, "grad_norm": 2.201042890548706, "learning_rate": 6.026312439675552e-06, "loss": 2.6412, "step": 170 }, { "epoch": 0.15765765765765766, "grad_norm": 2.3642938137054443, "learning_rate": 4.2113336672471245e-06, "loss": 2.6353, "step": 175 }, { "epoch": 0.16216216216216217, "grad_norm": 2.2860445976257324, "learning_rate": 2.7091379149682685e-06, "loss": 2.6157, "step": 180 }, { "epoch": 0.16666666666666666, "grad_norm": 2.4721200466156006, "learning_rate": 1.5299867030334814e-06, "loss": 2.4462, "step": 185 }, { "epoch": 0.17117117117117117, "grad_norm": 2.7481462955474854, "learning_rate": 6.819348298638839e-07, "loss": 2.5367, "step": 190 }, { "epoch": 0.17567567567567569, "grad_norm": 2.6995551586151123, "learning_rate": 1.7077534966650766e-07, "loss": 2.5209, "step": 195 }, { "epoch": 0.18018018018018017, "grad_norm": 3.7238929271698, "learning_rate": 0.0, "loss": 2.0425, "step": 200 }, { "epoch": 0.18018018018018017, "eval_loss": 0.6100401282310486, "eval_runtime": 35.6094, "eval_samples_per_second": 13.143, "eval_steps_per_second": 6.571, "step": 200 } ], "logging_steps": 5, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6.51499696816128e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }