{ "best_metric": 1.3322960138320923, "best_model_checkpoint": "miner_id_24/checkpoint-200", "epoch": 0.05658108777141241, "eval_steps": 100, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00028290543885706204, "eval_loss": 1.673740267753601, "eval_runtime": 227.7898, "eval_samples_per_second": 6.537, "eval_steps_per_second": 3.271, "step": 1 }, { "epoch": 0.0028290543885706202, "grad_norm": 1.5898836851119995, "learning_rate": 2.5e-06, "loss": 1.5848, "step": 10 }, { "epoch": 0.0056581087771412405, "grad_norm": 1.5884448289871216, "learning_rate": 5e-06, "loss": 1.5452, "step": 20 }, { "epoch": 0.008487163165711862, "grad_norm": 1.2885594367980957, "learning_rate": 7.5e-06, "loss": 1.5814, "step": 30 }, { "epoch": 0.011316217554282481, "grad_norm": 1.5312845706939697, "learning_rate": 1e-05, "loss": 1.5392, "step": 40 }, { "epoch": 0.014145271942853102, "grad_norm": 1.4758936166763306, "learning_rate": 1.25e-05, "loss": 1.5315, "step": 50 }, { "epoch": 0.016974326331423723, "grad_norm": 1.223138689994812, "learning_rate": 1.5e-05, "loss": 1.4613, "step": 60 }, { "epoch": 0.01980338071999434, "grad_norm": 1.6201387643814087, "learning_rate": 1.75e-05, "loss": 1.4275, "step": 70 }, { "epoch": 0.022632435108564962, "grad_norm": 1.7035235166549683, "learning_rate": 2e-05, "loss": 1.4017, "step": 80 }, { "epoch": 0.025461489497135583, "grad_norm": 1.5906124114990234, "learning_rate": 2.25e-05, "loss": 1.3736, "step": 90 }, { "epoch": 0.028290543885706204, "grad_norm": 1.6032127141952515, "learning_rate": 2.5e-05, "loss": 1.3443, "step": 100 }, { "epoch": 0.028290543885706204, "eval_loss": 1.3803842067718506, "eval_runtime": 227.8885, "eval_samples_per_second": 6.534, "eval_steps_per_second": 3.269, "step": 100 }, { "epoch": 0.031119598274276822, "grad_norm": 1.742843508720398, "learning_rate": 2.7500000000000004e-05, "loss": 1.3753, "step": 110 }, { "epoch": 0.033948652662847446, "grad_norm": 1.5557827949523926, "learning_rate": 3e-05, "loss": 1.3553, "step": 120 }, { "epoch": 0.036777707051418064, "grad_norm": 1.3856598138809204, "learning_rate": 3.2500000000000004e-05, "loss": 1.3361, "step": 130 }, { "epoch": 0.03960676143998868, "grad_norm": 1.4673396348953247, "learning_rate": 3.5e-05, "loss": 1.383, "step": 140 }, { "epoch": 0.042435815828559306, "grad_norm": 1.6237844228744507, "learning_rate": 3.7500000000000003e-05, "loss": 1.3061, "step": 150 }, { "epoch": 0.045264870217129924, "grad_norm": 1.9737865924835205, "learning_rate": 4e-05, "loss": 1.2873, "step": 160 }, { "epoch": 0.04809392460570054, "grad_norm": 1.3253408670425415, "learning_rate": 4.25e-05, "loss": 1.315, "step": 170 }, { "epoch": 0.050922978994271166, "grad_norm": 1.7338840961456299, "learning_rate": 4.5e-05, "loss": 1.3374, "step": 180 }, { "epoch": 0.053752033382841784, "grad_norm": 2.050339698791504, "learning_rate": 4.75e-05, "loss": 1.2882, "step": 190 }, { "epoch": 0.05658108777141241, "grad_norm": 1.292114019393921, "learning_rate": 5e-05, "loss": 1.3593, "step": 200 }, { "epoch": 0.05658108777141241, "eval_loss": 1.3322960138320923, "eval_runtime": 227.8824, "eval_samples_per_second": 6.534, "eval_steps_per_second": 3.269, "step": 200 } ], "logging_steps": 10, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6.7353521946624e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }