| { | |
| "best_metric": 0.7818862795829773, | |
| "best_model_checkpoint": "miner_id_24/checkpoint-2700", | |
| "epoch": 4.452606195511037, | |
| "eval_steps": 150, | |
| "global_step": 3000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0014839547393804488, | |
| "eval_loss": 1.1009923219680786, | |
| "eval_runtime": 14.0062, | |
| "eval_samples_per_second": 40.554, | |
| "eval_steps_per_second": 20.277, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.07419773696902245, | |
| "grad_norm": 0.1295081079006195, | |
| "learning_rate": 0.0002, | |
| "loss": 1.0386, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.1483954739380449, | |
| "grad_norm": 0.1271422952413559, | |
| "learning_rate": 0.0001998582695676762, | |
| "loss": 0.9538, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.22259321090706732, | |
| "grad_norm": 0.12388234585523605, | |
| "learning_rate": 0.00019943348002101371, | |
| "loss": 0.9411, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.22259321090706732, | |
| "eval_loss": 0.9268936514854431, | |
| "eval_runtime": 14.0765, | |
| "eval_samples_per_second": 40.351, | |
| "eval_steps_per_second": 20.175, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.2967909478760898, | |
| "grad_norm": 0.13258318603038788, | |
| "learning_rate": 0.00019872683547213446, | |
| "loss": 0.9297, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.3709886848451122, | |
| "grad_norm": 0.1404084414243698, | |
| "learning_rate": 0.00019774033898178667, | |
| "loss": 0.9078, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.44518642181413465, | |
| "grad_norm": 0.14087454974651337, | |
| "learning_rate": 0.0001964767868814516, | |
| "loss": 0.9052, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.44518642181413465, | |
| "eval_loss": 0.8921840786933899, | |
| "eval_runtime": 14.2434, | |
| "eval_samples_per_second": 39.878, | |
| "eval_steps_per_second": 19.939, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.5193841587831571, | |
| "grad_norm": 0.14395581185817719, | |
| "learning_rate": 0.00019493976084683813, | |
| "loss": 0.8842, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.5935818957521796, | |
| "grad_norm": 0.13259702920913696, | |
| "learning_rate": 0.00019313361774523385, | |
| "loss": 0.8869, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.667779632721202, | |
| "grad_norm": 0.141865536570549, | |
| "learning_rate": 0.00019106347728549135, | |
| "loss": 0.8705, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.667779632721202, | |
| "eval_loss": 0.8677888512611389, | |
| "eval_runtime": 14.1021, | |
| "eval_samples_per_second": 40.278, | |
| "eval_steps_per_second": 20.139, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.7419773696902244, | |
| "grad_norm": 0.1400303989648819, | |
| "learning_rate": 0.00018873520750565718, | |
| "loss": 0.8765, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.8161751066592469, | |
| "grad_norm": 0.1415148228406906, | |
| "learning_rate": 0.0001861554081393806, | |
| "loss": 0.8718, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.8903728436282693, | |
| "grad_norm": 0.1443643420934677, | |
| "learning_rate": 0.0001833313919082515, | |
| "loss": 0.8544, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.8903728436282693, | |
| "eval_loss": 0.8513836860656738, | |
| "eval_runtime": 14.4561, | |
| "eval_samples_per_second": 39.291, | |
| "eval_steps_per_second": 19.646, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.9645705805972918, | |
| "grad_norm": 0.15358565747737885, | |
| "learning_rate": 0.00018027116379309638, | |
| "loss": 0.855, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.0389538119087367, | |
| "grad_norm": 0.14816033840179443, | |
| "learning_rate": 0.00017698339834299061, | |
| "loss": 0.8524, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.1131515488777592, | |
| "grad_norm": 0.1639455258846283, | |
| "learning_rate": 0.00017347741508630672, | |
| "loss": 0.8167, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.1131515488777592, | |
| "eval_loss": 0.8397712111473083, | |
| "eval_runtime": 14.3467, | |
| "eval_samples_per_second": 39.591, | |
| "eval_steps_per_second": 19.796, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.1873492858467816, | |
| "grad_norm": 0.1472490429878235, | |
| "learning_rate": 0.0001697631521134985, | |
| "loss": 0.8086, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.261547022815804, | |
| "grad_norm": 0.16415859758853912, | |
| "learning_rate": 0.00016585113790650388, | |
| "loss": 0.802, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.3357447597848267, | |
| "grad_norm": 0.1668313443660736, | |
| "learning_rate": 0.0001617524614946192, | |
| "loss": 0.7961, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.3357447597848267, | |
| "eval_loss": 0.8281386494636536, | |
| "eval_runtime": 13.996, | |
| "eval_samples_per_second": 40.583, | |
| "eval_steps_per_second": 20.292, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.409942496753849, | |
| "grad_norm": 0.1581781506538391, | |
| "learning_rate": 0.0001574787410214407, | |
| "loss": 0.8142, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.4841402337228715, | |
| "grad_norm": 0.16410471498966217, | |
| "learning_rate": 0.00015304209081197425, | |
| "loss": 0.8208, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.5583379706918938, | |
| "grad_norm": 0.16278082132339478, | |
| "learning_rate": 0.00014845508703326504, | |
| "loss": 0.7965, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.5583379706918938, | |
| "eval_loss": 0.8185766339302063, | |
| "eval_runtime": 14.096, | |
| "eval_samples_per_second": 40.295, | |
| "eval_steps_per_second": 20.148, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.6325357076609164, | |
| "grad_norm": 0.1645856499671936, | |
| "learning_rate": 0.00014373073204588556, | |
| "loss": 0.796, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.706733444629939, | |
| "grad_norm": 0.16889511048793793, | |
| "learning_rate": 0.00013888241754733208, | |
| "loss": 0.7972, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 1.7809311815989612, | |
| "grad_norm": 0.16280411183834076, | |
| "learning_rate": 0.00013392388661180303, | |
| "loss": 0.7986, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.7809311815989612, | |
| "eval_loss": 0.8099638223648071, | |
| "eval_runtime": 14.0445, | |
| "eval_samples_per_second": 40.443, | |
| "eval_steps_per_second": 20.221, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.8551289185679836, | |
| "grad_norm": 0.16928894817829132, | |
| "learning_rate": 0.0001288691947339621, | |
| "loss": 0.7992, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 1.929326655537006, | |
| "grad_norm": 0.16871026158332825, | |
| "learning_rate": 0.0001237326699871115, | |
| "loss": 0.8071, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 2.0037098868484513, | |
| "grad_norm": 0.1630755364894867, | |
| "learning_rate": 0.00011852887240871145, | |
| "loss": 0.7548, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 2.0037098868484513, | |
| "eval_loss": 0.8026086688041687, | |
| "eval_runtime": 14.0517, | |
| "eval_samples_per_second": 40.422, | |
| "eval_steps_per_second": 20.211, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 2.0779076238174734, | |
| "grad_norm": 0.16451841592788696, | |
| "learning_rate": 0.00011327255272837221, | |
| "loss": 0.7476, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 2.152105360786496, | |
| "grad_norm": 0.18384945392608643, | |
| "learning_rate": 0.00010797861055530831, | |
| "loss": 0.7691, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 2.2263030977555185, | |
| "grad_norm": 0.18934905529022217, | |
| "learning_rate": 0.00010266205214377748, | |
| "loss": 0.7409, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 2.2263030977555185, | |
| "eval_loss": 0.7997989654541016, | |
| "eval_runtime": 13.98, | |
| "eval_samples_per_second": 40.629, | |
| "eval_steps_per_second": 20.315, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 2.300500834724541, | |
| "grad_norm": 0.18251581490039825, | |
| "learning_rate": 9.733794785622253e-05, | |
| "loss": 0.7668, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 2.374698571693563, | |
| "grad_norm": 0.1793128401041031, | |
| "learning_rate": 9.202138944469168e-05, | |
| "loss": 0.7456, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 2.4488963086625857, | |
| "grad_norm": 0.17612189054489136, | |
| "learning_rate": 8.672744727162781e-05, | |
| "loss": 0.7508, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 2.4488963086625857, | |
| "eval_loss": 0.7954422831535339, | |
| "eval_runtime": 14.3584, | |
| "eval_samples_per_second": 39.559, | |
| "eval_steps_per_second": 19.779, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 2.523094045631608, | |
| "grad_norm": 0.19443200528621674, | |
| "learning_rate": 8.147112759128859e-05, | |
| "loss": 0.7682, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 2.5972917826006308, | |
| "grad_norm": 0.1849536895751953, | |
| "learning_rate": 7.626733001288851e-05, | |
| "loss": 0.7461, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 2.6714895195696533, | |
| "grad_norm": 0.203544482588768, | |
| "learning_rate": 7.113080526603792e-05, | |
| "loss": 0.7606, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 2.6714895195696533, | |
| "eval_loss": 0.7908361554145813, | |
| "eval_runtime": 14.0191, | |
| "eval_samples_per_second": 40.516, | |
| "eval_steps_per_second": 20.258, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 2.745687256538676, | |
| "grad_norm": 0.18425534665584564, | |
| "learning_rate": 6.607611338819697e-05, | |
| "loss": 0.7399, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 2.819884993507698, | |
| "grad_norm": 0.18944227695465088, | |
| "learning_rate": 6.111758245266794e-05, | |
| "loss": 0.7481, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 2.8940827304767205, | |
| "grad_norm": 0.17936797440052032, | |
| "learning_rate": 5.626926795411447e-05, | |
| "loss": 0.7366, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 2.8940827304767205, | |
| "eval_loss": 0.7873192429542542, | |
| "eval_runtime": 14.1781, | |
| "eval_samples_per_second": 40.062, | |
| "eval_steps_per_second": 20.031, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 2.968280467445743, | |
| "grad_norm": 0.19529031217098236, | |
| "learning_rate": 5.1544912966734994e-05, | |
| "loss": 0.7521, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 3.042663698757188, | |
| "grad_norm": 0.1839251071214676, | |
| "learning_rate": 4.695790918802576e-05, | |
| "loss": 0.76, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 3.1168614357262103, | |
| "grad_norm": 0.18065090477466583, | |
| "learning_rate": 4.252125897855932e-05, | |
| "loss": 0.7173, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 3.1168614357262103, | |
| "eval_loss": 0.7872137427330017, | |
| "eval_runtime": 14.1181, | |
| "eval_samples_per_second": 40.232, | |
| "eval_steps_per_second": 20.116, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 3.191059172695233, | |
| "grad_norm": 0.18571385741233826, | |
| "learning_rate": 3.824753850538082e-05, | |
| "loss": 0.7305, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 3.2652569096642554, | |
| "grad_norm": 0.182917520403862, | |
| "learning_rate": 3.414886209349615e-05, | |
| "loss": 0.7339, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 3.3394546466332775, | |
| "grad_norm": 0.19343669712543488, | |
| "learning_rate": 3.0236847886501542e-05, | |
| "loss": 0.7294, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 3.3394546466332775, | |
| "eval_loss": 0.7847491502761841, | |
| "eval_runtime": 14.0352, | |
| "eval_samples_per_second": 40.47, | |
| "eval_steps_per_second": 20.235, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 3.4136523836023, | |
| "grad_norm": 0.19025780260562897, | |
| "learning_rate": 2.6522584913693294e-05, | |
| "loss": 0.7251, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 3.4878501205713226, | |
| "grad_norm": 0.19098390638828278, | |
| "learning_rate": 2.301660165700936e-05, | |
| "loss": 0.7128, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 3.562047857540345, | |
| "grad_norm": 0.1902209222316742, | |
| "learning_rate": 1.9728836206903656e-05, | |
| "loss": 0.7282, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 3.562047857540345, | |
| "eval_loss": 0.7834780216217041, | |
| "eval_runtime": 13.9633, | |
| "eval_samples_per_second": 40.678, | |
| "eval_steps_per_second": 20.339, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 3.6362455945093677, | |
| "grad_norm": 0.19502930343151093, | |
| "learning_rate": 1.6668608091748495e-05, | |
| "loss": 0.7328, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 3.71044333147839, | |
| "grad_norm": 0.18348978459835052, | |
| "learning_rate": 1.3844591860619383e-05, | |
| "loss": 0.7293, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 3.7846410684474123, | |
| "grad_norm": 0.18518677353858948, | |
| "learning_rate": 1.1264792494342857e-05, | |
| "loss": 0.72, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 3.7846410684474123, | |
| "eval_loss": 0.7825514078140259, | |
| "eval_runtime": 14.1713, | |
| "eval_samples_per_second": 40.081, | |
| "eval_steps_per_second": 20.041, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 3.858838805416435, | |
| "grad_norm": 0.1809045374393463, | |
| "learning_rate": 8.936522714508678e-06, | |
| "loss": 0.7246, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 3.933036542385457, | |
| "grad_norm": 0.18573115766048431, | |
| "learning_rate": 6.866382254766157e-06, | |
| "loss": 0.724, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 4.007419773696903, | |
| "grad_norm": 0.1879529505968094, | |
| "learning_rate": 5.060239153161872e-06, | |
| "loss": 0.7085, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 4.007419773696903, | |
| "eval_loss": 0.7818862795829773, | |
| "eval_runtime": 14.0534, | |
| "eval_samples_per_second": 40.417, | |
| "eval_steps_per_second": 20.209, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 4.081617510665924, | |
| "grad_norm": 0.18808983266353607, | |
| "learning_rate": 3.5232131185484076e-06, | |
| "loss": 0.7197, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 4.155815247634947, | |
| "grad_norm": 0.1956692934036255, | |
| "learning_rate": 2.259661018213333e-06, | |
| "loss": 0.7152, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 4.230012984603969, | |
| "grad_norm": 0.1999446451663971, | |
| "learning_rate": 1.2731645278655445e-06, | |
| "loss": 0.7127, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 4.230012984603969, | |
| "eval_loss": 0.7822389006614685, | |
| "eval_runtime": 14.2177, | |
| "eval_samples_per_second": 39.95, | |
| "eval_steps_per_second": 19.975, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 4.304210721572992, | |
| "grad_norm": 0.1753046214580536, | |
| "learning_rate": 5.665199789862907e-07, | |
| "loss": 0.7241, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 4.378408458542014, | |
| "grad_norm": 0.19707240164279938, | |
| "learning_rate": 1.4173043232380557e-07, | |
| "loss": 0.7269, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 4.452606195511037, | |
| "grad_norm": 0.1791616678237915, | |
| "learning_rate": 0.0, | |
| "loss": 0.7119, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 4.452606195511037, | |
| "eval_loss": 0.7823454141616821, | |
| "eval_runtime": 14.0926, | |
| "eval_samples_per_second": 40.305, | |
| "eval_steps_per_second": 20.152, | |
| "step": 3000 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 3000, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 150, | |
| "stateful_callbacks": { | |
| "EarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 3, | |
| "early_stopping_threshold": 0.0 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 2 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.4201657147392e+16, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |