| { | |
| "best_metric": NaN, | |
| "best_model_checkpoint": "miner_id_24/checkpoint-50", | |
| "epoch": 0.6888633754305397, | |
| "eval_steps": 50, | |
| "global_step": 75, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.009184845005740528, | |
| "grad_norm": NaN, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.009184845005740528, | |
| "eval_loss": NaN, | |
| "eval_runtime": 7.7223, | |
| "eval_samples_per_second": 47.525, | |
| "eval_steps_per_second": 5.957, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.018369690011481057, | |
| "grad_norm": NaN, | |
| "learning_rate": 1e-05, | |
| "loss": 0.0, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.027554535017221583, | |
| "grad_norm": NaN, | |
| "learning_rate": 1.5e-05, | |
| "loss": 0.0, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.03673938002296211, | |
| "grad_norm": NaN, | |
| "learning_rate": 2e-05, | |
| "loss": 0.0, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.045924225028702644, | |
| "grad_norm": NaN, | |
| "learning_rate": 2.5e-05, | |
| "loss": 0.0, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.05510907003444317, | |
| "grad_norm": NaN, | |
| "learning_rate": 3e-05, | |
| "loss": 0.0, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.0642939150401837, | |
| "grad_norm": NaN, | |
| "learning_rate": 3.5e-05, | |
| "loss": 0.0, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.07347876004592423, | |
| "grad_norm": NaN, | |
| "learning_rate": 4e-05, | |
| "loss": 0.0, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.08266360505166476, | |
| "grad_norm": NaN, | |
| "learning_rate": 4.5e-05, | |
| "loss": 0.0, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.09184845005740529, | |
| "grad_norm": NaN, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.1010332950631458, | |
| "grad_norm": NaN, | |
| "learning_rate": 5.500000000000001e-05, | |
| "loss": 0.0, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.11021814006888633, | |
| "grad_norm": NaN, | |
| "learning_rate": 6e-05, | |
| "loss": 0.0, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.11940298507462686, | |
| "grad_norm": NaN, | |
| "learning_rate": 6.500000000000001e-05, | |
| "loss": 0.0, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.1285878300803674, | |
| "grad_norm": NaN, | |
| "learning_rate": 7e-05, | |
| "loss": 0.0, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.1377726750861079, | |
| "grad_norm": NaN, | |
| "learning_rate": 7.500000000000001e-05, | |
| "loss": 0.0, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.14695752009184845, | |
| "grad_norm": NaN, | |
| "learning_rate": 8e-05, | |
| "loss": 0.0, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.15614236509758897, | |
| "grad_norm": NaN, | |
| "learning_rate": 8.5e-05, | |
| "loss": 0.0, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.1653272101033295, | |
| "grad_norm": NaN, | |
| "learning_rate": 9e-05, | |
| "loss": 0.0, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.17451205510907003, | |
| "grad_norm": NaN, | |
| "learning_rate": 9.5e-05, | |
| "loss": 0.0, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.18369690011481057, | |
| "grad_norm": NaN, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.1928817451205511, | |
| "grad_norm": NaN, | |
| "learning_rate": 9.991845519630678e-05, | |
| "loss": 0.0, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.2020665901262916, | |
| "grad_norm": NaN, | |
| "learning_rate": 9.967408676742751e-05, | |
| "loss": 0.0, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.21125143513203215, | |
| "grad_norm": NaN, | |
| "learning_rate": 9.926769179238466e-05, | |
| "loss": 0.0, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.22043628013777267, | |
| "grad_norm": NaN, | |
| "learning_rate": 9.870059584711668e-05, | |
| "loss": 0.0, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.2296211251435132, | |
| "grad_norm": NaN, | |
| "learning_rate": 9.797464868072488e-05, | |
| "loss": 0.0, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.23880597014925373, | |
| "grad_norm": NaN, | |
| "learning_rate": 9.709221818197624e-05, | |
| "loss": 0.0, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.24799081515499427, | |
| "grad_norm": NaN, | |
| "learning_rate": 9.60561826557425e-05, | |
| "loss": 0.0, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.2571756601607348, | |
| "grad_norm": NaN, | |
| "learning_rate": 9.486992143456792e-05, | |
| "loss": 0.0, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.2663605051664753, | |
| "grad_norm": NaN, | |
| "learning_rate": 9.353730385598887e-05, | |
| "loss": 0.0, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.2755453501722158, | |
| "grad_norm": NaN, | |
| "learning_rate": 9.206267664155907e-05, | |
| "loss": 0.0, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.2847301951779564, | |
| "grad_norm": NaN, | |
| "learning_rate": 9.045084971874738e-05, | |
| "loss": 0.0, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.2939150401836969, | |
| "grad_norm": NaN, | |
| "learning_rate": 8.870708053195413e-05, | |
| "loss": 0.0, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.3030998851894374, | |
| "grad_norm": NaN, | |
| "learning_rate": 8.683705689382024e-05, | |
| "loss": 0.0, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.31228473019517794, | |
| "grad_norm": NaN, | |
| "learning_rate": 8.484687843276469e-05, | |
| "loss": 0.0, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.3214695752009185, | |
| "grad_norm": NaN, | |
| "learning_rate": 8.274303669726426e-05, | |
| "loss": 0.0, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.330654420206659, | |
| "grad_norm": NaN, | |
| "learning_rate": 8.053239398177191e-05, | |
| "loss": 0.0, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.33983926521239954, | |
| "grad_norm": NaN, | |
| "learning_rate": 7.822216094333847e-05, | |
| "loss": 0.0, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.34902411021814006, | |
| "grad_norm": NaN, | |
| "learning_rate": 7.58198730819481e-05, | |
| "loss": 0.0, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.3582089552238806, | |
| "grad_norm": NaN, | |
| "learning_rate": 7.333336616128369e-05, | |
| "loss": 0.0, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.36739380022962115, | |
| "grad_norm": NaN, | |
| "learning_rate": 7.077075065009433e-05, | |
| "loss": 0.0, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.37657864523536166, | |
| "grad_norm": NaN, | |
| "learning_rate": 6.814038526753205e-05, | |
| "loss": 0.0, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.3857634902411022, | |
| "grad_norm": NaN, | |
| "learning_rate": 6.545084971874738e-05, | |
| "loss": 0.0, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.3949483352468427, | |
| "grad_norm": NaN, | |
| "learning_rate": 6.271091670967436e-05, | |
| "loss": 0.0, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.4041331802525832, | |
| "grad_norm": NaN, | |
| "learning_rate": 5.992952333228728e-05, | |
| "loss": 0.0, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.4133180252583238, | |
| "grad_norm": NaN, | |
| "learning_rate": 5.7115741913664264e-05, | |
| "loss": 0.0, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.4225028702640643, | |
| "grad_norm": NaN, | |
| "learning_rate": 5.427875042394199e-05, | |
| "loss": 0.0, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.4316877152698048, | |
| "grad_norm": NaN, | |
| "learning_rate": 5.142780253968481e-05, | |
| "loss": 0.0, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.44087256027554533, | |
| "grad_norm": NaN, | |
| "learning_rate": 4.85721974603152e-05, | |
| "loss": 0.0, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.4500574052812859, | |
| "grad_norm": NaN, | |
| "learning_rate": 4.5721249576058027e-05, | |
| "loss": 0.0, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.4592422502870264, | |
| "grad_norm": NaN, | |
| "learning_rate": 4.288425808633575e-05, | |
| "loss": 0.0, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.4592422502870264, | |
| "eval_loss": NaN, | |
| "eval_runtime": 7.5033, | |
| "eval_samples_per_second": 48.912, | |
| "eval_steps_per_second": 6.131, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.46842709529276694, | |
| "grad_norm": NaN, | |
| "learning_rate": 4.007047666771274e-05, | |
| "loss": 0.0, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.47761194029850745, | |
| "grad_norm": NaN, | |
| "learning_rate": 3.728908329032567e-05, | |
| "loss": 0.0, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.48679678530424797, | |
| "grad_norm": NaN, | |
| "learning_rate": 3.4549150281252636e-05, | |
| "loss": 0.0, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.49598163030998854, | |
| "grad_norm": NaN, | |
| "learning_rate": 3.1859614732467954e-05, | |
| "loss": 0.0, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.505166475315729, | |
| "grad_norm": NaN, | |
| "learning_rate": 2.9229249349905684e-05, | |
| "loss": 0.0, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.5143513203214696, | |
| "grad_norm": NaN, | |
| "learning_rate": 2.6666633838716314e-05, | |
| "loss": 0.0, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.5235361653272101, | |
| "grad_norm": NaN, | |
| "learning_rate": 2.418012691805191e-05, | |
| "loss": 0.0, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.5327210103329506, | |
| "grad_norm": NaN, | |
| "learning_rate": 2.1777839056661554e-05, | |
| "loss": 0.0, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.5419058553386912, | |
| "grad_norm": NaN, | |
| "learning_rate": 1.946760601822809e-05, | |
| "loss": 0.0, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.5510907003444316, | |
| "grad_norm": NaN, | |
| "learning_rate": 1.725696330273575e-05, | |
| "loss": 0.0, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.5602755453501722, | |
| "grad_norm": NaN, | |
| "learning_rate": 1.5153121567235335e-05, | |
| "loss": 0.0, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.5694603903559128, | |
| "grad_norm": NaN, | |
| "learning_rate": 1.3162943106179749e-05, | |
| "loss": 0.0, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.5786452353616532, | |
| "grad_norm": NaN, | |
| "learning_rate": 1.1292919468045877e-05, | |
| "loss": 0.0, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.5878300803673938, | |
| "grad_norm": NaN, | |
| "learning_rate": 9.549150281252633e-06, | |
| "loss": 0.0, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.5970149253731343, | |
| "grad_norm": NaN, | |
| "learning_rate": 7.937323358440935e-06, | |
| "loss": 0.0, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.6061997703788748, | |
| "grad_norm": NaN, | |
| "learning_rate": 6.462696144011149e-06, | |
| "loss": 0.0, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.6153846153846154, | |
| "grad_norm": NaN, | |
| "learning_rate": 5.13007856543209e-06, | |
| "loss": 0.0, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.6245694603903559, | |
| "grad_norm": NaN, | |
| "learning_rate": 3.9438173442575e-06, | |
| "loss": 0.0, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.6337543053960965, | |
| "grad_norm": NaN, | |
| "learning_rate": 2.9077818180237693e-06, | |
| "loss": 0.0, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.642939150401837, | |
| "grad_norm": NaN, | |
| "learning_rate": 2.0253513192751373e-06, | |
| "loss": 0.0, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.6521239954075775, | |
| "grad_norm": NaN, | |
| "learning_rate": 1.2994041528833266e-06, | |
| "loss": 0.0, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.661308840413318, | |
| "grad_norm": NaN, | |
| "learning_rate": 7.323082076153509e-07, | |
| "loss": 0.0, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.6704936854190585, | |
| "grad_norm": NaN, | |
| "learning_rate": 3.2591323257248893e-07, | |
| "loss": 0.0, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.6796785304247991, | |
| "grad_norm": NaN, | |
| "learning_rate": 8.15448036932176e-08, | |
| "loss": 0.0, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.6888633754305397, | |
| "grad_norm": NaN, | |
| "learning_rate": 0.0, | |
| "loss": 0.0, | |
| "step": 75 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 75, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 50, | |
| "stateful_callbacks": { | |
| "EarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 5, | |
| "early_stopping_threshold": 0.0 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 0 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 369937612800.0, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |