| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.24353943985928833, | |
| "eval_steps": 500, | |
| "global_step": 900, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.00027059937762143147, | |
| "grad_norm": 4.086390018463135, | |
| "learning_rate": 0.0, | |
| "loss": 3.2754, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0005411987552428629, | |
| "grad_norm": 3.758815288543701, | |
| "learning_rate": 9.017132551848513e-08, | |
| "loss": 3.2863, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.0010823975104857259, | |
| "grad_norm": 3.8250608444213867, | |
| "learning_rate": 2.705139765554554e-07, | |
| "loss": 3.3425, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.0016235962657285888, | |
| "grad_norm": 3.8092095851898193, | |
| "learning_rate": 4.5085662759242564e-07, | |
| "loss": 3.3165, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.0021647950209714517, | |
| "grad_norm": 3.7621052265167236, | |
| "learning_rate": 6.311992786293959e-07, | |
| "loss": 3.3295, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.002705993776214315, | |
| "grad_norm": 3.4136276245117188, | |
| "learning_rate": 8.115419296663661e-07, | |
| "loss": 3.3073, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.0032471925314571776, | |
| "grad_norm": 2.855100393295288, | |
| "learning_rate": 9.918845807033363e-07, | |
| "loss": 3.3031, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.0037883912867000408, | |
| "grad_norm": 2.491767406463623, | |
| "learning_rate": 1.1722272317403068e-06, | |
| "loss": 3.2943, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.0043295900419429035, | |
| "grad_norm": 2.359778642654419, | |
| "learning_rate": 1.3525698827772768e-06, | |
| "loss": 3.2622, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.004870788797185766, | |
| "grad_norm": 2.037504196166992, | |
| "learning_rate": 1.5329125338142473e-06, | |
| "loss": 3.239, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.00541198755242863, | |
| "grad_norm": 2.8542497158050537, | |
| "learning_rate": 1.7132551848512173e-06, | |
| "loss": 3.2031, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.0059531863076714925, | |
| "grad_norm": 2.297046661376953, | |
| "learning_rate": 1.8935978358881876e-06, | |
| "loss": 3.1721, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.006494385062914355, | |
| "grad_norm": 2.2149112224578857, | |
| "learning_rate": 2.0739404869251576e-06, | |
| "loss": 3.121, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.007035583818157218, | |
| "grad_norm": 1.8048591613769531, | |
| "learning_rate": 2.254283137962128e-06, | |
| "loss": 3.0857, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.0075767825734000815, | |
| "grad_norm": 1.7466434240341187, | |
| "learning_rate": 2.4346257889990986e-06, | |
| "loss": 3.0489, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.008117981328642944, | |
| "grad_norm": 2.1722524166107178, | |
| "learning_rate": 2.6149684400360686e-06, | |
| "loss": 3.0016, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.008659180083885807, | |
| "grad_norm": 1.364578366279602, | |
| "learning_rate": 2.7953110910730386e-06, | |
| "loss": 2.9587, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.00920037883912867, | |
| "grad_norm": 1.5823427438735962, | |
| "learning_rate": 2.9756537421100095e-06, | |
| "loss": 2.931, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.009741577594371532, | |
| "grad_norm": 1.2367908954620361, | |
| "learning_rate": 3.1559963931469796e-06, | |
| "loss": 2.8953, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.010282776349614395, | |
| "grad_norm": 1.0437366962432861, | |
| "learning_rate": 3.3363390441839496e-06, | |
| "loss": 2.8412, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.01082397510485726, | |
| "grad_norm": 1.081803798675537, | |
| "learning_rate": 3.5166816952209197e-06, | |
| "loss": 2.7832, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.011365173860100122, | |
| "grad_norm": 0.9715840220451355, | |
| "learning_rate": 3.69702434625789e-06, | |
| "loss": 2.7729, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.011906372615342985, | |
| "grad_norm": 0.8603936433792114, | |
| "learning_rate": 3.877366997294861e-06, | |
| "loss": 2.6904, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.012447571370585848, | |
| "grad_norm": 0.8236231803894043, | |
| "learning_rate": 4.057709648331831e-06, | |
| "loss": 2.6908, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.01298877012582871, | |
| "grad_norm": 0.7681186199188232, | |
| "learning_rate": 4.2380522993688015e-06, | |
| "loss": 2.6212, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.013529968881071573, | |
| "grad_norm": 0.8002827167510986, | |
| "learning_rate": 4.4183949504057716e-06, | |
| "loss": 2.6035, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.014071167636314436, | |
| "grad_norm": 0.6757120490074158, | |
| "learning_rate": 4.598737601442742e-06, | |
| "loss": 2.595, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.014612366391557299, | |
| "grad_norm": 0.6619369387626648, | |
| "learning_rate": 4.779080252479712e-06, | |
| "loss": 2.5522, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.015153565146800163, | |
| "grad_norm": 0.6247105598449707, | |
| "learning_rate": 4.959422903516682e-06, | |
| "loss": 2.5079, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.015694763902043024, | |
| "grad_norm": 0.6559263467788696, | |
| "learning_rate": 5.139765554553652e-06, | |
| "loss": 2.5009, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.01623596265728589, | |
| "grad_norm": 0.6590877175331116, | |
| "learning_rate": 5.320108205590623e-06, | |
| "loss": 2.4648, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.01677716141252875, | |
| "grad_norm": 0.6045516133308411, | |
| "learning_rate": 5.500450856627593e-06, | |
| "loss": 2.421, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.017318360167771614, | |
| "grad_norm": 0.6533932089805603, | |
| "learning_rate": 5.680793507664563e-06, | |
| "loss": 2.3966, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.01785955892301448, | |
| "grad_norm": 0.6478094458580017, | |
| "learning_rate": 5.861136158701533e-06, | |
| "loss": 2.3903, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.01840075767825734, | |
| "grad_norm": 0.7349300980567932, | |
| "learning_rate": 6.041478809738504e-06, | |
| "loss": 2.3552, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.018941956433500204, | |
| "grad_norm": 0.6454821825027466, | |
| "learning_rate": 6.221821460775474e-06, | |
| "loss": 2.3262, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.019483155188743065, | |
| "grad_norm": 0.7321672439575195, | |
| "learning_rate": 6.402164111812444e-06, | |
| "loss": 2.3197, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.02002435394398593, | |
| "grad_norm": 0.7664237022399902, | |
| "learning_rate": 6.582506762849414e-06, | |
| "loss": 2.2992, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.02056555269922879, | |
| "grad_norm": 0.6843811869621277, | |
| "learning_rate": 6.762849413886384e-06, | |
| "loss": 2.2927, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.021106751454471655, | |
| "grad_norm": 0.7199612259864807, | |
| "learning_rate": 6.9431920649233556e-06, | |
| "loss": 2.2525, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.02164795020971452, | |
| "grad_norm": 0.778446614742279, | |
| "learning_rate": 7.123534715960326e-06, | |
| "loss": 2.2267, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.02218914896495738, | |
| "grad_norm": 0.9287930727005005, | |
| "learning_rate": 7.303877366997296e-06, | |
| "loss": 2.2206, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.022730347720200245, | |
| "grad_norm": 1.033782958984375, | |
| "learning_rate": 7.484220018034266e-06, | |
| "loss": 2.2063, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.023271546475443106, | |
| "grad_norm": 1.0132615566253662, | |
| "learning_rate": 7.664562669071236e-06, | |
| "loss": 2.1677, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.02381274523068597, | |
| "grad_norm": 0.9043529033660889, | |
| "learning_rate": 7.844905320108207e-06, | |
| "loss": 2.1696, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.02435394398592883, | |
| "grad_norm": 0.6718290448188782, | |
| "learning_rate": 8.025247971145176e-06, | |
| "loss": 2.1492, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.024895142741171696, | |
| "grad_norm": 0.9615944027900696, | |
| "learning_rate": 8.205590622182147e-06, | |
| "loss": 2.1452, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.02543634149641456, | |
| "grad_norm": 0.9435996413230896, | |
| "learning_rate": 8.385933273219116e-06, | |
| "loss": 2.1098, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.02597754025165742, | |
| "grad_norm": 0.7614261507987976, | |
| "learning_rate": 8.566275924256087e-06, | |
| "loss": 2.1286, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.026518739006900285, | |
| "grad_norm": 0.9416339993476868, | |
| "learning_rate": 8.746618575293058e-06, | |
| "loss": 2.1092, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.027059937762143146, | |
| "grad_norm": 0.9229443073272705, | |
| "learning_rate": 8.926961226330027e-06, | |
| "loss": 2.0932, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.02760113651738601, | |
| "grad_norm": 0.7135593295097351, | |
| "learning_rate": 9.107303877366998e-06, | |
| "loss": 2.0699, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.028142335272628872, | |
| "grad_norm": 1.0263723134994507, | |
| "learning_rate": 9.287646528403967e-06, | |
| "loss": 2.0445, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.028683534027871736, | |
| "grad_norm": 1.0300300121307373, | |
| "learning_rate": 9.467989179440938e-06, | |
| "loss": 2.0463, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.029224732783114597, | |
| "grad_norm": 0.8331286311149597, | |
| "learning_rate": 9.648331830477909e-06, | |
| "loss": 2.0381, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.02976593153835746, | |
| "grad_norm": 0.7501435875892639, | |
| "learning_rate": 9.828674481514878e-06, | |
| "loss": 2.0411, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.030307130293600326, | |
| "grad_norm": 0.6895191073417664, | |
| "learning_rate": 1.0009017132551849e-05, | |
| "loss": 2.0475, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.030848329048843187, | |
| "grad_norm": 0.95854252576828, | |
| "learning_rate": 1.018935978358882e-05, | |
| "loss": 2.0071, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.03138952780408605, | |
| "grad_norm": 1.1303929090499878, | |
| "learning_rate": 1.036970243462579e-05, | |
| "loss": 2.0008, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.031930726559328916, | |
| "grad_norm": 0.7708876729011536, | |
| "learning_rate": 1.055004508566276e-05, | |
| "loss": 2.0061, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.03247192531457178, | |
| "grad_norm": 0.9773860573768616, | |
| "learning_rate": 1.073038773669973e-05, | |
| "loss": 2.0096, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.03301312406981464, | |
| "grad_norm": 1.118385910987854, | |
| "learning_rate": 1.09107303877367e-05, | |
| "loss": 1.9939, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.0335543228250575, | |
| "grad_norm": 0.7215014696121216, | |
| "learning_rate": 1.109107303877367e-05, | |
| "loss": 1.9515, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.03409552158030037, | |
| "grad_norm": 0.9696834683418274, | |
| "learning_rate": 1.1271415689810642e-05, | |
| "loss": 1.9639, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.03463672033554323, | |
| "grad_norm": 0.945482611656189, | |
| "learning_rate": 1.1451758340847611e-05, | |
| "loss": 1.9397, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.03517791909078609, | |
| "grad_norm": 0.7454535365104675, | |
| "learning_rate": 1.1632100991884582e-05, | |
| "loss": 1.9353, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.03571911784602896, | |
| "grad_norm": 0.7824187278747559, | |
| "learning_rate": 1.1812443642921551e-05, | |
| "loss": 1.9227, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.03626031660127182, | |
| "grad_norm": 0.7939879894256592, | |
| "learning_rate": 1.1992786293958522e-05, | |
| "loss": 1.9126, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.03680151535651468, | |
| "grad_norm": 0.7776147723197937, | |
| "learning_rate": 1.2173128944995491e-05, | |
| "loss": 1.9002, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.03734271411175754, | |
| "grad_norm": 0.6580236554145813, | |
| "learning_rate": 1.2353471596032462e-05, | |
| "loss": 1.9121, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.03788391286700041, | |
| "grad_norm": 0.7200301289558411, | |
| "learning_rate": 1.2533814247069433e-05, | |
| "loss": 1.8885, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.03842511162224327, | |
| "grad_norm": 0.7958497405052185, | |
| "learning_rate": 1.2714156898106402e-05, | |
| "loss": 1.9095, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.03896631037748613, | |
| "grad_norm": 0.9120681881904602, | |
| "learning_rate": 1.2894499549143375e-05, | |
| "loss": 1.884, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.039507509132729, | |
| "grad_norm": 0.8108247518539429, | |
| "learning_rate": 1.3074842200180342e-05, | |
| "loss": 1.8656, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.04004870788797186, | |
| "grad_norm": 0.7010449171066284, | |
| "learning_rate": 1.3255184851217315e-05, | |
| "loss": 1.8635, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.04058990664321472, | |
| "grad_norm": 0.8178524374961853, | |
| "learning_rate": 1.3435527502254284e-05, | |
| "loss": 1.8933, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.04113110539845758, | |
| "grad_norm": 1.0447405576705933, | |
| "learning_rate": 1.3615870153291255e-05, | |
| "loss": 1.8523, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.04167230415370045, | |
| "grad_norm": 0.8516271710395813, | |
| "learning_rate": 1.3796212804328224e-05, | |
| "loss": 1.8528, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.04221350290894331, | |
| "grad_norm": 0.8437328934669495, | |
| "learning_rate": 1.3976555455365195e-05, | |
| "loss": 1.861, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.04275470166418617, | |
| "grad_norm": 0.851265549659729, | |
| "learning_rate": 1.4156898106402164e-05, | |
| "loss": 1.8315, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.04329590041942904, | |
| "grad_norm": 0.7337156534194946, | |
| "learning_rate": 1.4337240757439135e-05, | |
| "loss": 1.8354, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.0438370991746719, | |
| "grad_norm": 0.9754143357276917, | |
| "learning_rate": 1.4517583408476104e-05, | |
| "loss": 1.8252, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.04437829792991476, | |
| "grad_norm": 0.6172115802764893, | |
| "learning_rate": 1.4697926059513075e-05, | |
| "loss": 1.8094, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.04491949668515762, | |
| "grad_norm": 0.8304158449172974, | |
| "learning_rate": 1.4878268710550044e-05, | |
| "loss": 1.8078, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.04546069544040049, | |
| "grad_norm": 0.6388853788375854, | |
| "learning_rate": 1.5058611361587017e-05, | |
| "loss": 1.8106, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.04600189419564335, | |
| "grad_norm": 0.743231475353241, | |
| "learning_rate": 1.5238954012623984e-05, | |
| "loss": 1.8144, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.04654309295088621, | |
| "grad_norm": 0.6442289352416992, | |
| "learning_rate": 1.5419296663660955e-05, | |
| "loss": 1.7831, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.04708429170612908, | |
| "grad_norm": 0.6877187490463257, | |
| "learning_rate": 1.559963931469793e-05, | |
| "loss": 1.8043, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.04762549046137194, | |
| "grad_norm": 0.9389640688896179, | |
| "learning_rate": 1.5779981965734897e-05, | |
| "loss": 1.7869, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.0481666892166148, | |
| "grad_norm": 1.0456589460372925, | |
| "learning_rate": 1.5960324616771868e-05, | |
| "loss": 1.7681, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.04870788797185766, | |
| "grad_norm": 0.9617791175842285, | |
| "learning_rate": 1.614066726780884e-05, | |
| "loss": 1.7668, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.04924908672710053, | |
| "grad_norm": 0.9334360361099243, | |
| "learning_rate": 1.632100991884581e-05, | |
| "loss": 1.7893, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.04979028548234339, | |
| "grad_norm": 0.8952531814575195, | |
| "learning_rate": 1.6501352569882777e-05, | |
| "loss": 1.7758, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.05033148423758625, | |
| "grad_norm": 0.8544924855232239, | |
| "learning_rate": 1.6681695220919748e-05, | |
| "loss": 1.793, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.05087268299282912, | |
| "grad_norm": 0.7782765030860901, | |
| "learning_rate": 1.686203787195672e-05, | |
| "loss": 1.768, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.05141388174807198, | |
| "grad_norm": 0.7119695544242859, | |
| "learning_rate": 1.704238052299369e-05, | |
| "loss": 1.7685, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.05195508050331484, | |
| "grad_norm": 0.9119647145271301, | |
| "learning_rate": 1.7222723174030657e-05, | |
| "loss": 1.7706, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.0524962792585577, | |
| "grad_norm": 0.6414957642555237, | |
| "learning_rate": 1.7403065825067628e-05, | |
| "loss": 1.7626, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.05303747801380057, | |
| "grad_norm": 0.8069677352905273, | |
| "learning_rate": 1.75834084761046e-05, | |
| "loss": 1.7423, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.05357867676904343, | |
| "grad_norm": 0.6549937725067139, | |
| "learning_rate": 1.776375112714157e-05, | |
| "loss": 1.7428, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.05411987552428629, | |
| "grad_norm": 0.8064024448394775, | |
| "learning_rate": 1.7944093778178538e-05, | |
| "loss": 1.7448, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.054661074279529154, | |
| "grad_norm": 0.7182701826095581, | |
| "learning_rate": 1.8124436429215512e-05, | |
| "loss": 1.7248, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.05520227303477202, | |
| "grad_norm": 0.6997919678688049, | |
| "learning_rate": 1.830477908025248e-05, | |
| "loss": 1.7281, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.05574347179001488, | |
| "grad_norm": 0.7071277499198914, | |
| "learning_rate": 1.848512173128945e-05, | |
| "loss": 1.714, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.056284670545257744, | |
| "grad_norm": 0.6344273090362549, | |
| "learning_rate": 1.866546438232642e-05, | |
| "loss": 1.7463, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.05682586930050061, | |
| "grad_norm": 0.7192733883857727, | |
| "learning_rate": 1.8845807033363392e-05, | |
| "loss": 1.737, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.05736706805574347, | |
| "grad_norm": 0.7418521642684937, | |
| "learning_rate": 1.9026149684400363e-05, | |
| "loss": 1.7197, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.057908266810986334, | |
| "grad_norm": 0.875845730304718, | |
| "learning_rate": 1.920649233543733e-05, | |
| "loss": 1.6968, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.058449465566229195, | |
| "grad_norm": 0.7394037842750549, | |
| "learning_rate": 1.9386834986474305e-05, | |
| "loss": 1.7051, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.05899066432147206, | |
| "grad_norm": 0.6689572930335999, | |
| "learning_rate": 1.9567177637511272e-05, | |
| "loss": 1.7152, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.05953186307671492, | |
| "grad_norm": 0.7955539226531982, | |
| "learning_rate": 1.9747520288548243e-05, | |
| "loss": 1.7136, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.060073061831957784, | |
| "grad_norm": 0.7005388140678406, | |
| "learning_rate": 1.9927862939585214e-05, | |
| "loss": 1.7152, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.06061426058720065, | |
| "grad_norm": 0.6205731630325317, | |
| "learning_rate": 2.0108205590622185e-05, | |
| "loss": 1.6901, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.06115545934244351, | |
| "grad_norm": 0.7079929709434509, | |
| "learning_rate": 2.0288548241659152e-05, | |
| "loss": 1.6905, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.061696658097686374, | |
| "grad_norm": 0.6871302723884583, | |
| "learning_rate": 2.0468890892696123e-05, | |
| "loss": 1.6867, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.062237856852929235, | |
| "grad_norm": 0.7172162532806396, | |
| "learning_rate": 2.0649233543733094e-05, | |
| "loss": 1.685, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.0627790556081721, | |
| "grad_norm": 0.6729004979133606, | |
| "learning_rate": 2.0829576194770065e-05, | |
| "loss": 1.6961, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.06332025436341496, | |
| "grad_norm": 0.7335099577903748, | |
| "learning_rate": 2.1009918845807033e-05, | |
| "loss": 1.6797, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.06386145311865783, | |
| "grad_norm": 0.6398060321807861, | |
| "learning_rate": 2.1190261496844003e-05, | |
| "loss": 1.7037, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.0644026518739007, | |
| "grad_norm": 0.7026365399360657, | |
| "learning_rate": 2.1370604147880974e-05, | |
| "loss": 1.6698, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.06494385062914355, | |
| "grad_norm": 0.7972332239151001, | |
| "learning_rate": 2.1550946798917945e-05, | |
| "loss": 1.6866, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.06548504938438642, | |
| "grad_norm": 0.7363021969795227, | |
| "learning_rate": 2.1731289449954913e-05, | |
| "loss": 1.6879, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.06602624813962928, | |
| "grad_norm": 0.7071017026901245, | |
| "learning_rate": 2.1911632100991887e-05, | |
| "loss": 1.6898, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.06656744689487214, | |
| "grad_norm": 0.8030880093574524, | |
| "learning_rate": 2.2091974752028858e-05, | |
| "loss": 1.6734, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.067108645650115, | |
| "grad_norm": 0.7429569363594055, | |
| "learning_rate": 2.2272317403065825e-05, | |
| "loss": 1.6722, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.06764984440535787, | |
| "grad_norm": 0.6807804107666016, | |
| "learning_rate": 2.2452660054102796e-05, | |
| "loss": 1.6697, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.06819104316060073, | |
| "grad_norm": 0.6632562875747681, | |
| "learning_rate": 2.2633002705139767e-05, | |
| "loss": 1.6453, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.0687322419158436, | |
| "grad_norm": 0.6661680340766907, | |
| "learning_rate": 2.2813345356176738e-05, | |
| "loss": 1.6701, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.06927344067108646, | |
| "grad_norm": 0.6747105121612549, | |
| "learning_rate": 2.2993688007213706e-05, | |
| "loss": 1.6729, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.06981463942632932, | |
| "grad_norm": 0.7698473334312439, | |
| "learning_rate": 2.317403065825068e-05, | |
| "loss": 1.6528, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.07035583818157218, | |
| "grad_norm": 0.6111325621604919, | |
| "learning_rate": 2.3354373309287647e-05, | |
| "loss": 1.6412, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.07089703693681504, | |
| "grad_norm": 0.7405019998550415, | |
| "learning_rate": 2.3534715960324618e-05, | |
| "loss": 1.6564, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.07143823569205791, | |
| "grad_norm": 0.6702501773834229, | |
| "learning_rate": 2.371505861136159e-05, | |
| "loss": 1.654, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.07197943444730077, | |
| "grad_norm": 0.7076373100280762, | |
| "learning_rate": 2.389540126239856e-05, | |
| "loss": 1.6301, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.07252063320254364, | |
| "grad_norm": 0.7239627242088318, | |
| "learning_rate": 2.4075743913435528e-05, | |
| "loss": 1.6575, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.0730618319577865, | |
| "grad_norm": 0.753480076789856, | |
| "learning_rate": 2.42560865644725e-05, | |
| "loss": 1.6603, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.07360303071302936, | |
| "grad_norm": 0.7261641025543213, | |
| "learning_rate": 2.443642921550947e-05, | |
| "loss": 1.6449, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.07414422946827222, | |
| "grad_norm": 0.6315119862556458, | |
| "learning_rate": 2.461677186654644e-05, | |
| "loss": 1.6538, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.07468542822351508, | |
| "grad_norm": 0.5698412656784058, | |
| "learning_rate": 2.4797114517583408e-05, | |
| "loss": 1.6663, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.07522662697875795, | |
| "grad_norm": 0.5968983173370361, | |
| "learning_rate": 2.497745716862038e-05, | |
| "loss": 1.643, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.07576782573400082, | |
| "grad_norm": 0.561126172542572, | |
| "learning_rate": 2.5157799819657353e-05, | |
| "loss": 1.6301, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.07630902448924368, | |
| "grad_norm": 0.7290865778923035, | |
| "learning_rate": 2.533814247069432e-05, | |
| "loss": 1.6412, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.07685022324448654, | |
| "grad_norm": 0.7629122138023376, | |
| "learning_rate": 2.5518485121731288e-05, | |
| "loss": 1.6335, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.0773914219997294, | |
| "grad_norm": 0.5383496284484863, | |
| "learning_rate": 2.5698827772768262e-05, | |
| "loss": 1.6226, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.07793262075497226, | |
| "grad_norm": 0.7778373956680298, | |
| "learning_rate": 2.5879170423805233e-05, | |
| "loss": 1.6333, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.07847381951021512, | |
| "grad_norm": 0.6851366758346558, | |
| "learning_rate": 2.60595130748422e-05, | |
| "loss": 1.6251, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.079015018265458, | |
| "grad_norm": 0.5947225689888, | |
| "learning_rate": 2.623985572587917e-05, | |
| "loss": 1.6298, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.07955621702070086, | |
| "grad_norm": 0.9742544889450073, | |
| "learning_rate": 2.6420198376916146e-05, | |
| "loss": 1.6252, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.08009741577594372, | |
| "grad_norm": 1.2064323425292969, | |
| "learning_rate": 2.6600541027953113e-05, | |
| "loss": 1.6152, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.08063861453118658, | |
| "grad_norm": 1.0506716966629028, | |
| "learning_rate": 2.678088367899008e-05, | |
| "loss": 1.6351, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.08117981328642944, | |
| "grad_norm": 1.2992738485336304, | |
| "learning_rate": 2.696122633002705e-05, | |
| "loss": 1.6193, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.0817210120416723, | |
| "grad_norm": 1.0616599321365356, | |
| "learning_rate": 2.7141568981064026e-05, | |
| "loss": 1.6135, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.08226221079691516, | |
| "grad_norm": 1.037997841835022, | |
| "learning_rate": 2.7321911632100993e-05, | |
| "loss": 1.6344, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.08280340955215804, | |
| "grad_norm": 0.8937569856643677, | |
| "learning_rate": 2.7502254283137964e-05, | |
| "loss": 1.6077, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.0833446083074009, | |
| "grad_norm": 1.1334234476089478, | |
| "learning_rate": 2.7682596934174932e-05, | |
| "loss": 1.6193, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.08388580706264376, | |
| "grad_norm": 0.8336219191551208, | |
| "learning_rate": 2.7862939585211906e-05, | |
| "loss": 1.5948, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.08442700581788662, | |
| "grad_norm": 1.1825398206710815, | |
| "learning_rate": 2.8043282236248874e-05, | |
| "loss": 1.6239, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.08496820457312948, | |
| "grad_norm": 0.7945433259010315, | |
| "learning_rate": 2.8223624887285844e-05, | |
| "loss": 1.6119, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.08550940332837234, | |
| "grad_norm": 0.6971009969711304, | |
| "learning_rate": 2.8403967538322812e-05, | |
| "loss": 1.5822, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.0860506020836152, | |
| "grad_norm": 0.6050766706466675, | |
| "learning_rate": 2.8584310189359786e-05, | |
| "loss": 1.6161, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.08659180083885808, | |
| "grad_norm": 0.6123189330101013, | |
| "learning_rate": 2.8764652840396754e-05, | |
| "loss": 1.5941, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.08713299959410094, | |
| "grad_norm": 0.5471253395080566, | |
| "learning_rate": 2.8944995491433725e-05, | |
| "loss": 1.603, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.0876741983493438, | |
| "grad_norm": 0.5793882608413696, | |
| "learning_rate": 2.91253381424707e-05, | |
| "loss": 1.6076, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.08821539710458666, | |
| "grad_norm": 0.5409413576126099, | |
| "learning_rate": 2.9305680793507666e-05, | |
| "loss": 1.5825, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.08875659585982952, | |
| "grad_norm": 6.757148265838623, | |
| "learning_rate": 2.9486023444544637e-05, | |
| "loss": 1.5942, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.08929779461507238, | |
| "grad_norm": 1.3357856273651123, | |
| "learning_rate": 2.9666366095581605e-05, | |
| "loss": 1.642, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.08983899337031524, | |
| "grad_norm": 0.8245829939842224, | |
| "learning_rate": 2.984670874661858e-05, | |
| "loss": 1.6062, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.09038019212555812, | |
| "grad_norm": 0.8888993263244629, | |
| "learning_rate": 3.0027051397655547e-05, | |
| "loss": 1.5952, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.09092139088080098, | |
| "grad_norm": 0.8923915028572083, | |
| "learning_rate": 3.0207394048692517e-05, | |
| "loss": 1.5977, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.09146258963604384, | |
| "grad_norm": 0.7443459033966064, | |
| "learning_rate": 3.0387736699729485e-05, | |
| "loss": 1.5738, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.0920037883912867, | |
| "grad_norm": 0.7297430038452148, | |
| "learning_rate": 3.056807935076646e-05, | |
| "loss": 1.5907, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.09254498714652956, | |
| "grad_norm": 0.6882812976837158, | |
| "learning_rate": 3.074842200180343e-05, | |
| "loss": 1.5767, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.09308618590177242, | |
| "grad_norm": 0.6150392889976501, | |
| "learning_rate": 3.0928764652840394e-05, | |
| "loss": 1.5747, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.09362738465701528, | |
| "grad_norm": 0.6230599284172058, | |
| "learning_rate": 3.110910730387737e-05, | |
| "loss": 1.583, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.09416858341225816, | |
| "grad_norm": 0.6081874966621399, | |
| "learning_rate": 3.128944995491434e-05, | |
| "loss": 1.5875, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.09470978216750102, | |
| "grad_norm": 0.5467821955680847, | |
| "learning_rate": 3.146979260595131e-05, | |
| "loss": 1.575, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.09525098092274388, | |
| "grad_norm": 0.5629361271858215, | |
| "learning_rate": 3.165013525698828e-05, | |
| "loss": 1.5828, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.09579217967798674, | |
| "grad_norm": 0.5995283126831055, | |
| "learning_rate": 3.1830477908025245e-05, | |
| "loss": 1.5872, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.0963333784332296, | |
| "grad_norm": 0.556450366973877, | |
| "learning_rate": 3.201082055906222e-05, | |
| "loss": 1.553, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.09687457718847246, | |
| "grad_norm": 0.6498537063598633, | |
| "learning_rate": 3.219116321009919e-05, | |
| "loss": 1.5667, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.09741577594371532, | |
| "grad_norm": 0.5891172885894775, | |
| "learning_rate": 3.237150586113616e-05, | |
| "loss": 1.5818, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.0979569746989582, | |
| "grad_norm": 0.6487797498703003, | |
| "learning_rate": 3.2551848512173136e-05, | |
| "loss": 1.5582, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.09849817345420106, | |
| "grad_norm": 0.5860658884048462, | |
| "learning_rate": 3.27321911632101e-05, | |
| "loss": 1.5725, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.09903937220944392, | |
| "grad_norm": 0.5619581937789917, | |
| "learning_rate": 3.291253381424707e-05, | |
| "loss": 1.5779, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.09958057096468678, | |
| "grad_norm": 0.7147429585456848, | |
| "learning_rate": 3.309287646528404e-05, | |
| "loss": 1.5766, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.10012176971992964, | |
| "grad_norm": 0.5840562582015991, | |
| "learning_rate": 3.327321911632101e-05, | |
| "loss": 1.5609, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.1006629684751725, | |
| "grad_norm": 0.6277860403060913, | |
| "learning_rate": 3.345356176735798e-05, | |
| "loss": 1.5645, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.10120416723041536, | |
| "grad_norm": 0.6395567655563354, | |
| "learning_rate": 3.3633904418394954e-05, | |
| "loss": 1.545, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.10174536598565824, | |
| "grad_norm": 0.6651553511619568, | |
| "learning_rate": 3.381424706943192e-05, | |
| "loss": 1.5643, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.1022865647409011, | |
| "grad_norm": 0.6691033244132996, | |
| "learning_rate": 3.3994589720468896e-05, | |
| "loss": 1.5705, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.10282776349614396, | |
| "grad_norm": 0.5426511764526367, | |
| "learning_rate": 3.4174932371505863e-05, | |
| "loss": 1.536, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.10336896225138682, | |
| "grad_norm": 0.6677694916725159, | |
| "learning_rate": 3.435527502254283e-05, | |
| "loss": 1.5664, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.10391016100662968, | |
| "grad_norm": 0.5283762216567993, | |
| "learning_rate": 3.45356176735798e-05, | |
| "loss": 1.5474, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.10445135976187254, | |
| "grad_norm": 0.652812659740448, | |
| "learning_rate": 3.471596032461677e-05, | |
| "loss": 1.5509, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.1049925585171154, | |
| "grad_norm": 0.8639987111091614, | |
| "learning_rate": 3.489630297565375e-05, | |
| "loss": 1.5563, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.10553375727235827, | |
| "grad_norm": 0.7726946473121643, | |
| "learning_rate": 3.5076645626690715e-05, | |
| "loss": 1.5682, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.10607495602760114, | |
| "grad_norm": 0.6511155962944031, | |
| "learning_rate": 3.525698827772768e-05, | |
| "loss": 1.5571, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.106616154782844, | |
| "grad_norm": 0.6578395962715149, | |
| "learning_rate": 3.5437330928764656e-05, | |
| "loss": 1.5452, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.10715735353808686, | |
| "grad_norm": 0.642919659614563, | |
| "learning_rate": 3.5617673579801624e-05, | |
| "loss": 1.5508, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.10769855229332972, | |
| "grad_norm": 0.5190348029136658, | |
| "learning_rate": 3.579801623083859e-05, | |
| "loss": 1.5432, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.10823975104857259, | |
| "grad_norm": 0.48932549357414246, | |
| "learning_rate": 3.5978358881875566e-05, | |
| "loss": 1.5544, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.10878094980381545, | |
| "grad_norm": 0.5018340945243835, | |
| "learning_rate": 3.615870153291254e-05, | |
| "loss": 1.5322, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.10932214855905831, | |
| "grad_norm": 0.5701499581336975, | |
| "learning_rate": 3.633904418394951e-05, | |
| "loss": 1.5288, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.10986334731430118, | |
| "grad_norm": 0.6049205660820007, | |
| "learning_rate": 3.6519386834986475e-05, | |
| "loss": 1.5627, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.11040454606954404, | |
| "grad_norm": 0.5781517028808594, | |
| "learning_rate": 3.669972948602345e-05, | |
| "loss": 1.542, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.1109457448247869, | |
| "grad_norm": 0.5594660043716431, | |
| "learning_rate": 3.688007213706042e-05, | |
| "loss": 1.5461, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.11148694358002977, | |
| "grad_norm": 0.5319619178771973, | |
| "learning_rate": 3.7060414788097384e-05, | |
| "loss": 1.5668, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.11202814233527263, | |
| "grad_norm": 0.5311123728752136, | |
| "learning_rate": 3.724075743913435e-05, | |
| "loss": 1.528, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.11256934109051549, | |
| "grad_norm": 0.5555101633071899, | |
| "learning_rate": 3.7421100090171326e-05, | |
| "loss": 1.5392, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.11311053984575835, | |
| "grad_norm": 0.5486223101615906, | |
| "learning_rate": 3.76014427412083e-05, | |
| "loss": 1.5337, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.11365173860100122, | |
| "grad_norm": 0.5156669020652771, | |
| "learning_rate": 3.778178539224527e-05, | |
| "loss": 1.5105, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.11419293735624408, | |
| "grad_norm": 0.49596554040908813, | |
| "learning_rate": 3.7962128043282235e-05, | |
| "loss": 1.515, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.11473413611148695, | |
| "grad_norm": 0.641333281993866, | |
| "learning_rate": 3.814247069431921e-05, | |
| "loss": 1.5328, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.1152753348667298, | |
| "grad_norm": 0.6106113195419312, | |
| "learning_rate": 3.832281334535618e-05, | |
| "loss": 1.5189, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.11581653362197267, | |
| "grad_norm": 0.5619134306907654, | |
| "learning_rate": 3.8503155996393145e-05, | |
| "loss": 1.5295, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.11635773237721553, | |
| "grad_norm": 0.5396978259086609, | |
| "learning_rate": 3.868349864743012e-05, | |
| "loss": 1.5173, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.11689893113245839, | |
| "grad_norm": 0.5466894507408142, | |
| "learning_rate": 3.886384129846709e-05, | |
| "loss": 1.5191, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.11744012988770126, | |
| "grad_norm": 0.5601218342781067, | |
| "learning_rate": 3.904418394950406e-05, | |
| "loss": 1.5285, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.11798132864294412, | |
| "grad_norm": 0.6620492935180664, | |
| "learning_rate": 3.922452660054103e-05, | |
| "loss": 1.4946, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.11852252739818699, | |
| "grad_norm": 0.49140048027038574, | |
| "learning_rate": 3.9404869251578e-05, | |
| "loss": 1.512, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.11906372615342985, | |
| "grad_norm": 0.5824118256568909, | |
| "learning_rate": 3.958521190261497e-05, | |
| "loss": 1.5244, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.11960492490867271, | |
| "grad_norm": 0.4967150092124939, | |
| "learning_rate": 3.976555455365194e-05, | |
| "loss": 1.5273, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.12014612366391557, | |
| "grad_norm": 0.5089767575263977, | |
| "learning_rate": 3.994589720468891e-05, | |
| "loss": 1.5119, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.12068732241915843, | |
| "grad_norm": 0.5404312014579773, | |
| "learning_rate": 4.0126239855725886e-05, | |
| "loss": 1.5072, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.1212285211744013, | |
| "grad_norm": 0.5239550471305847, | |
| "learning_rate": 4.0306582506762853e-05, | |
| "loss": 1.5336, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.12176971992964417, | |
| "grad_norm": 0.4974781274795532, | |
| "learning_rate": 4.048692515779982e-05, | |
| "loss": 1.5225, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.12231091868488703, | |
| "grad_norm": 0.5363791584968567, | |
| "learning_rate": 4.066726780883679e-05, | |
| "loss": 1.5176, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.12285211744012989, | |
| "grad_norm": 0.5095157027244568, | |
| "learning_rate": 4.084761045987376e-05, | |
| "loss": 1.4936, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.12339331619537275, | |
| "grad_norm": 0.4920356869697571, | |
| "learning_rate": 4.102795311091073e-05, | |
| "loss": 1.5269, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.12393451495061561, | |
| "grad_norm": 0.4940793514251709, | |
| "learning_rate": 4.1208295761947705e-05, | |
| "loss": 1.5072, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.12447571370585847, | |
| "grad_norm": 0.4805227220058441, | |
| "learning_rate": 4.138863841298467e-05, | |
| "loss": 1.4987, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.12501691246110133, | |
| "grad_norm": 0.49683934450149536, | |
| "learning_rate": 4.1568981064021646e-05, | |
| "loss": 1.5008, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.1255581112163442, | |
| "grad_norm": 0.5283801555633545, | |
| "learning_rate": 4.1749323715058614e-05, | |
| "loss": 1.5177, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.12609930997158705, | |
| "grad_norm": 0.5395119190216064, | |
| "learning_rate": 4.192966636609558e-05, | |
| "loss": 1.5106, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.12664050872682991, | |
| "grad_norm": 0.5403693914413452, | |
| "learning_rate": 4.211000901713255e-05, | |
| "loss": 1.4854, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.1271817074820728, | |
| "grad_norm": 0.4690951406955719, | |
| "learning_rate": 4.229035166816952e-05, | |
| "loss": 1.5079, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.12772290623731566, | |
| "grad_norm": 0.5077293515205383, | |
| "learning_rate": 4.24706943192065e-05, | |
| "loss": 1.4953, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.12826410499255853, | |
| "grad_norm": 0.440019816160202, | |
| "learning_rate": 4.2651036970243465e-05, | |
| "loss": 1.4864, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.1288053037478014, | |
| "grad_norm": 0.48672759532928467, | |
| "learning_rate": 4.283137962128044e-05, | |
| "loss": 1.5205, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.12934650250304425, | |
| "grad_norm": 0.4732811450958252, | |
| "learning_rate": 4.301172227231741e-05, | |
| "loss": 1.4998, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.1298877012582871, | |
| "grad_norm": 0.46713048219680786, | |
| "learning_rate": 4.3192064923354374e-05, | |
| "loss": 1.4893, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.13042890001352997, | |
| "grad_norm": 0.502356231212616, | |
| "learning_rate": 4.337240757439134e-05, | |
| "loss": 1.5125, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.13097009876877283, | |
| "grad_norm": 0.45067864656448364, | |
| "learning_rate": 4.3552750225428316e-05, | |
| "loss": 1.4978, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.1315112975240157, | |
| "grad_norm": 0.46964120864868164, | |
| "learning_rate": 4.373309287646529e-05, | |
| "loss": 1.5006, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.13205249627925855, | |
| "grad_norm": 0.47723180055618286, | |
| "learning_rate": 4.391343552750226e-05, | |
| "loss": 1.513, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.1325936950345014, | |
| "grad_norm": 0.5100542306900024, | |
| "learning_rate": 4.4093778178539225e-05, | |
| "loss": 1.5279, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.13313489378974427, | |
| "grad_norm": 0.5344257354736328, | |
| "learning_rate": 4.42741208295762e-05, | |
| "loss": 1.5193, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.13367609254498714, | |
| "grad_norm": 0.5867893695831299, | |
| "learning_rate": 4.445446348061317e-05, | |
| "loss": 1.512, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.13421729130023, | |
| "grad_norm": 0.7811394929885864, | |
| "learning_rate": 4.4634806131650134e-05, | |
| "loss": 1.5038, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.13475849005547288, | |
| "grad_norm": 0.8505339622497559, | |
| "learning_rate": 4.48151487826871e-05, | |
| "loss": 1.5169, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.13529968881071575, | |
| "grad_norm": 0.6337641477584839, | |
| "learning_rate": 4.4995491433724076e-05, | |
| "loss": 1.4951, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.1358408875659586, | |
| "grad_norm": 0.7979961633682251, | |
| "learning_rate": 4.517583408476105e-05, | |
| "loss": 1.5031, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.13638208632120147, | |
| "grad_norm": 0.6946894526481628, | |
| "learning_rate": 4.535617673579802e-05, | |
| "loss": 1.501, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.13692328507644433, | |
| "grad_norm": 0.6830259561538696, | |
| "learning_rate": 4.5536519386834986e-05, | |
| "loss": 1.4896, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.1374644838316872, | |
| "grad_norm": 0.5908662676811218, | |
| "learning_rate": 4.571686203787196e-05, | |
| "loss": 1.4992, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.13800568258693005, | |
| "grad_norm": 0.7655865550041199, | |
| "learning_rate": 4.589720468890893e-05, | |
| "loss": 1.4911, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.1385468813421729, | |
| "grad_norm": 0.5924785733222961, | |
| "learning_rate": 4.6077547339945895e-05, | |
| "loss": 1.4719, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.13908808009741577, | |
| "grad_norm": 0.6654263138771057, | |
| "learning_rate": 4.625788999098287e-05, | |
| "loss": 1.5109, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.13962927885265863, | |
| "grad_norm": 0.5296297073364258, | |
| "learning_rate": 4.6438232642019843e-05, | |
| "loss": 1.4934, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.1401704776079015, | |
| "grad_norm": 0.5698690414428711, | |
| "learning_rate": 4.661857529305681e-05, | |
| "loss": 1.4954, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.14071167636314436, | |
| "grad_norm": 0.5790325403213501, | |
| "learning_rate": 4.679891794409378e-05, | |
| "loss": 1.4673, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.14125287511838722, | |
| "grad_norm": 0.551480770111084, | |
| "learning_rate": 4.697926059513075e-05, | |
| "loss": 1.476, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.14179407387363008, | |
| "grad_norm": 0.5201780796051025, | |
| "learning_rate": 4.715960324616772e-05, | |
| "loss": 1.4701, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.14233527262887297, | |
| "grad_norm": 0.46442562341690063, | |
| "learning_rate": 4.733994589720469e-05, | |
| "loss": 1.4831, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.14287647138411583, | |
| "grad_norm": 0.5558522939682007, | |
| "learning_rate": 4.752028854824166e-05, | |
| "loss": 1.4729, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.1434176701393587, | |
| "grad_norm": 0.48511791229248047, | |
| "learning_rate": 4.7700631199278636e-05, | |
| "loss": 1.4742, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.14395886889460155, | |
| "grad_norm": 0.5244829058647156, | |
| "learning_rate": 4.7880973850315604e-05, | |
| "loss": 1.4928, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.1445000676498444, | |
| "grad_norm": 0.48878946900367737, | |
| "learning_rate": 4.806131650135257e-05, | |
| "loss": 1.4921, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.14504126640508727, | |
| "grad_norm": 0.5348760485649109, | |
| "learning_rate": 4.824165915238954e-05, | |
| "loss": 1.4917, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.14558246516033013, | |
| "grad_norm": 0.5444923639297485, | |
| "learning_rate": 4.842200180342651e-05, | |
| "loss": 1.4546, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.146123663915573, | |
| "grad_norm": 0.494761198759079, | |
| "learning_rate": 4.860234445446348e-05, | |
| "loss": 1.4751, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.14666486267081585, | |
| "grad_norm": 0.4921441674232483, | |
| "learning_rate": 4.8782687105500455e-05, | |
| "loss": 1.4767, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.14720606142605872, | |
| "grad_norm": 0.48382577300071716, | |
| "learning_rate": 4.896302975653742e-05, | |
| "loss": 1.485, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.14774726018130158, | |
| "grad_norm": 0.4616708755493164, | |
| "learning_rate": 4.9143372407574397e-05, | |
| "loss": 1.4732, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.14828845893654444, | |
| "grad_norm": 0.5030043125152588, | |
| "learning_rate": 4.9323715058611364e-05, | |
| "loss": 1.4799, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.1488296576917873, | |
| "grad_norm": 0.467230886220932, | |
| "learning_rate": 4.950405770964833e-05, | |
| "loss": 1.4594, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.14937085644703016, | |
| "grad_norm": 0.42864304780960083, | |
| "learning_rate": 4.9684400360685306e-05, | |
| "loss": 1.4748, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.14991205520227305, | |
| "grad_norm": 0.43733683228492737, | |
| "learning_rate": 4.986474301172227e-05, | |
| "loss": 1.462, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.1504532539575159, | |
| "grad_norm": 0.45550286769866943, | |
| "learning_rate": 5.004508566275925e-05, | |
| "loss": 1.475, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.15099445271275877, | |
| "grad_norm": 0.44999995827674866, | |
| "learning_rate": 5.022542831379622e-05, | |
| "loss": 1.4794, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.15153565146800163, | |
| "grad_norm": 0.5035279989242554, | |
| "learning_rate": 5.040577096483319e-05, | |
| "loss": 1.471, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.1520768502232445, | |
| "grad_norm": 0.44605591893196106, | |
| "learning_rate": 5.058611361587016e-05, | |
| "loss": 1.4461, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.15261804897848735, | |
| "grad_norm": 0.5482723712921143, | |
| "learning_rate": 5.0766456266907124e-05, | |
| "loss": 1.4597, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.1531592477337302, | |
| "grad_norm": 0.5323627591133118, | |
| "learning_rate": 5.094679891794409e-05, | |
| "loss": 1.4743, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.15370044648897307, | |
| "grad_norm": 0.5289944410324097, | |
| "learning_rate": 5.1127141568981066e-05, | |
| "loss": 1.5, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.15424164524421594, | |
| "grad_norm": 0.5446243286132812, | |
| "learning_rate": 5.1307484220018034e-05, | |
| "loss": 1.4751, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.1547828439994588, | |
| "grad_norm": 0.525830090045929, | |
| "learning_rate": 5.1487826871055015e-05, | |
| "loss": 1.4639, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.15532404275470166, | |
| "grad_norm": 0.48129191994667053, | |
| "learning_rate": 5.166816952209198e-05, | |
| "loss": 1.4652, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.15586524150994452, | |
| "grad_norm": 0.47915297746658325, | |
| "learning_rate": 5.184851217312895e-05, | |
| "loss": 1.4627, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.15640644026518738, | |
| "grad_norm": 0.5229325294494629, | |
| "learning_rate": 5.202885482416592e-05, | |
| "loss": 1.4525, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.15694763902043024, | |
| "grad_norm": 0.5452600121498108, | |
| "learning_rate": 5.2209197475202885e-05, | |
| "loss": 1.458, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.15748883777567313, | |
| "grad_norm": 0.427432656288147, | |
| "learning_rate": 5.238954012623985e-05, | |
| "loss": 1.4773, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.158030036530916, | |
| "grad_norm": 0.450712114572525, | |
| "learning_rate": 5.2569882777276827e-05, | |
| "loss": 1.469, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.15857123528615885, | |
| "grad_norm": 0.5500516891479492, | |
| "learning_rate": 5.27502254283138e-05, | |
| "loss": 1.4603, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.1591124340414017, | |
| "grad_norm": 0.457157164812088, | |
| "learning_rate": 5.2930568079350775e-05, | |
| "loss": 1.4785, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.15965363279664457, | |
| "grad_norm": 0.49750396609306335, | |
| "learning_rate": 5.311091073038774e-05, | |
| "loss": 1.4603, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.16019483155188743, | |
| "grad_norm": 0.5720525979995728, | |
| "learning_rate": 5.329125338142471e-05, | |
| "loss": 1.4753, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.1607360303071303, | |
| "grad_norm": 0.4425548315048218, | |
| "learning_rate": 5.347159603246168e-05, | |
| "loss": 1.462, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.16127722906237316, | |
| "grad_norm": 0.5064132809638977, | |
| "learning_rate": 5.3651938683498645e-05, | |
| "loss": 1.4596, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.16181842781761602, | |
| "grad_norm": 0.518460750579834, | |
| "learning_rate": 5.383228133453562e-05, | |
| "loss": 1.4763, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.16235962657285888, | |
| "grad_norm": 0.4613576829433441, | |
| "learning_rate": 5.401262398557259e-05, | |
| "loss": 1.4487, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.16290082532810174, | |
| "grad_norm": 0.7046213746070862, | |
| "learning_rate": 5.419296663660957e-05, | |
| "loss": 1.472, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.1634420240833446, | |
| "grad_norm": 0.6164196133613586, | |
| "learning_rate": 5.4373309287646535e-05, | |
| "loss": 1.4424, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.16398322283858746, | |
| "grad_norm": 0.5106020569801331, | |
| "learning_rate": 5.45536519386835e-05, | |
| "loss": 1.4567, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.16452442159383032, | |
| "grad_norm": 0.4291236400604248, | |
| "learning_rate": 5.473399458972047e-05, | |
| "loss": 1.4514, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.16506562034907318, | |
| "grad_norm": 0.46577414870262146, | |
| "learning_rate": 5.491433724075744e-05, | |
| "loss": 1.4408, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.16560681910431607, | |
| "grad_norm": 0.4729917049407959, | |
| "learning_rate": 5.509467989179441e-05, | |
| "loss": 1.4493, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.16614801785955893, | |
| "grad_norm": 0.4651925563812256, | |
| "learning_rate": 5.527502254283138e-05, | |
| "loss": 1.465, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.1666892166148018, | |
| "grad_norm": 0.4756859540939331, | |
| "learning_rate": 5.545536519386835e-05, | |
| "loss": 1.4641, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.16723041537004465, | |
| "grad_norm": 0.42555975914001465, | |
| "learning_rate": 5.563570784490533e-05, | |
| "loss": 1.4569, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.16777161412528752, | |
| "grad_norm": 0.5162522196769714, | |
| "learning_rate": 5.5816050495942296e-05, | |
| "loss": 1.4344, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.16831281288053038, | |
| "grad_norm": 0.5867063999176025, | |
| "learning_rate": 5.599639314697926e-05, | |
| "loss": 1.4647, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.16885401163577324, | |
| "grad_norm": 0.6629165410995483, | |
| "learning_rate": 5.617673579801623e-05, | |
| "loss": 1.473, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.1693952103910161, | |
| "grad_norm": 0.5905330777168274, | |
| "learning_rate": 5.6357078449053205e-05, | |
| "loss": 1.4459, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.16993640914625896, | |
| "grad_norm": 0.7457858324050903, | |
| "learning_rate": 5.653742110009017e-05, | |
| "loss": 1.4603, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.17047760790150182, | |
| "grad_norm": 0.5977684855461121, | |
| "learning_rate": 5.671776375112714e-05, | |
| "loss": 1.4621, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.17101880665674468, | |
| "grad_norm": 0.7097992897033691, | |
| "learning_rate": 5.689810640216412e-05, | |
| "loss": 1.4646, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.17156000541198754, | |
| "grad_norm": 0.5895450711250305, | |
| "learning_rate": 5.707844905320109e-05, | |
| "loss": 1.4338, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.1721012041672304, | |
| "grad_norm": 0.576877772808075, | |
| "learning_rate": 5.7258791704238056e-05, | |
| "loss": 1.4666, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.17264240292247326, | |
| "grad_norm": 0.541110098361969, | |
| "learning_rate": 5.7439134355275024e-05, | |
| "loss": 1.4624, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.17318360167771615, | |
| "grad_norm": 0.5172320604324341, | |
| "learning_rate": 5.7619477006312e-05, | |
| "loss": 1.473, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.17372480043295901, | |
| "grad_norm": 0.47511357069015503, | |
| "learning_rate": 5.7799819657348965e-05, | |
| "loss": 1.446, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.17426599918820188, | |
| "grad_norm": 0.48614808917045593, | |
| "learning_rate": 5.798016230838593e-05, | |
| "loss": 1.4394, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.17480719794344474, | |
| "grad_norm": 0.4435577094554901, | |
| "learning_rate": 5.81605049594229e-05, | |
| "loss": 1.43, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.1753483966986876, | |
| "grad_norm": 0.4458653926849365, | |
| "learning_rate": 5.834084761045988e-05, | |
| "loss": 1.46, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.17588959545393046, | |
| "grad_norm": 0.40675726532936096, | |
| "learning_rate": 5.852119026149685e-05, | |
| "loss": 1.4565, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.17643079420917332, | |
| "grad_norm": 0.4132504165172577, | |
| "learning_rate": 5.8701532912533817e-05, | |
| "loss": 1.4522, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.17697199296441618, | |
| "grad_norm": 0.40881386399269104, | |
| "learning_rate": 5.888187556357079e-05, | |
| "loss": 1.4232, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.17751319171965904, | |
| "grad_norm": 0.40527868270874023, | |
| "learning_rate": 5.906221821460776e-05, | |
| "loss": 1.441, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.1780543904749019, | |
| "grad_norm": 0.40227004885673523, | |
| "learning_rate": 5.9242560865644726e-05, | |
| "loss": 1.4259, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.17859558923014476, | |
| "grad_norm": 0.4043656289577484, | |
| "learning_rate": 5.942290351668169e-05, | |
| "loss": 1.4298, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.17913678798538762, | |
| "grad_norm": 0.4288482666015625, | |
| "learning_rate": 5.9603246167718674e-05, | |
| "loss": 1.4439, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.17967798674063049, | |
| "grad_norm": 0.4385060966014862, | |
| "learning_rate": 5.978358881875564e-05, | |
| "loss": 1.4237, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.18021918549587335, | |
| "grad_norm": 0.396980345249176, | |
| "learning_rate": 5.996393146979261e-05, | |
| "loss": 1.4174, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.18076038425111624, | |
| "grad_norm": 0.4060603678226471, | |
| "learning_rate": 6.014427412082958e-05, | |
| "loss": 1.4479, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.1813015830063591, | |
| "grad_norm": 0.4485025703907013, | |
| "learning_rate": 6.032461677186655e-05, | |
| "loss": 1.4493, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.18184278176160196, | |
| "grad_norm": 0.44034305214881897, | |
| "learning_rate": 6.050495942290352e-05, | |
| "loss": 1.4461, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.18238398051684482, | |
| "grad_norm": 0.418074369430542, | |
| "learning_rate": 6.0685302073940486e-05, | |
| "loss": 1.4287, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.18292517927208768, | |
| "grad_norm": 0.41937318444252014, | |
| "learning_rate": 6.0865644724977454e-05, | |
| "loss": 1.4338, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.18346637802733054, | |
| "grad_norm": 0.4103530943393707, | |
| "learning_rate": 6.104598737601444e-05, | |
| "loss": 1.4391, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.1840075767825734, | |
| "grad_norm": 0.4066039025783539, | |
| "learning_rate": 6.122633002705141e-05, | |
| "loss": 1.4357, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.18454877553781626, | |
| "grad_norm": 0.36903437972068787, | |
| "learning_rate": 6.140667267808838e-05, | |
| "loss": 1.4111, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.18508997429305912, | |
| "grad_norm": 0.37125757336616516, | |
| "learning_rate": 6.158701532912534e-05, | |
| "loss": 1.4233, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.18563117304830198, | |
| "grad_norm": 0.44102513790130615, | |
| "learning_rate": 6.176735798016231e-05, | |
| "loss": 1.4437, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 0.18617237180354484, | |
| "grad_norm": 0.4337277114391327, | |
| "learning_rate": 6.194770063119928e-05, | |
| "loss": 1.4425, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.1867135705587877, | |
| "grad_norm": 0.37394315004348755, | |
| "learning_rate": 6.212804328223625e-05, | |
| "loss": 1.4452, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.18725476931403057, | |
| "grad_norm": 0.41764944791793823, | |
| "learning_rate": 6.230838593327321e-05, | |
| "loss": 1.4535, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.18779596806927343, | |
| "grad_norm": 0.4214741289615631, | |
| "learning_rate": 6.24887285843102e-05, | |
| "loss": 1.4391, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 0.18833716682451632, | |
| "grad_norm": 0.4159027338027954, | |
| "learning_rate": 6.266907123534716e-05, | |
| "loss": 1.4197, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.18887836557975918, | |
| "grad_norm": 0.38865673542022705, | |
| "learning_rate": 6.284941388638413e-05, | |
| "loss": 1.4329, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 0.18941956433500204, | |
| "grad_norm": 0.43646490573883057, | |
| "learning_rate": 6.30297565374211e-05, | |
| "loss": 1.4147, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.1899607630902449, | |
| "grad_norm": 0.41997334361076355, | |
| "learning_rate": 6.321009918845807e-05, | |
| "loss": 1.4275, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 0.19050196184548776, | |
| "grad_norm": 0.38556602597236633, | |
| "learning_rate": 6.339044183949505e-05, | |
| "loss": 1.4258, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.19104316060073062, | |
| "grad_norm": 0.42955082654953003, | |
| "learning_rate": 6.357078449053201e-05, | |
| "loss": 1.4201, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 0.19158435935597348, | |
| "grad_norm": 0.3844427764415741, | |
| "learning_rate": 6.3751127141569e-05, | |
| "loss": 1.4448, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 0.19212555811121634, | |
| "grad_norm": 0.4312956929206848, | |
| "learning_rate": 6.393146979260596e-05, | |
| "loss": 1.4051, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.1926667568664592, | |
| "grad_norm": 0.4556865394115448, | |
| "learning_rate": 6.411181244364293e-05, | |
| "loss": 1.4305, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 0.19320795562170207, | |
| "grad_norm": 0.37053731083869934, | |
| "learning_rate": 6.42921550946799e-05, | |
| "loss": 1.4301, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 0.19374915437694493, | |
| "grad_norm": 0.3996010720729828, | |
| "learning_rate": 6.447249774571686e-05, | |
| "loss": 1.4282, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 0.1942903531321878, | |
| "grad_norm": 0.37610816955566406, | |
| "learning_rate": 6.465284039675383e-05, | |
| "loss": 1.4277, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 0.19483155188743065, | |
| "grad_norm": 0.3677166998386383, | |
| "learning_rate": 6.48331830477908e-05, | |
| "loss": 1.4029, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.1953727506426735, | |
| "grad_norm": 0.3841564357280731, | |
| "learning_rate": 6.501352569882777e-05, | |
| "loss": 1.4144, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 0.1959139493979164, | |
| "grad_norm": 0.3687719404697418, | |
| "learning_rate": 6.519386834986475e-05, | |
| "loss": 1.4079, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 0.19645514815315926, | |
| "grad_norm": 0.38350847363471985, | |
| "learning_rate": 6.537421100090172e-05, | |
| "loss": 1.4269, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 0.19699634690840212, | |
| "grad_norm": 0.39060813188552856, | |
| "learning_rate": 6.555455365193868e-05, | |
| "loss": 1.4265, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 0.19753754566364498, | |
| "grad_norm": 0.36068469285964966, | |
| "learning_rate": 6.573489630297565e-05, | |
| "loss": 1.4325, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.19807874441888784, | |
| "grad_norm": 0.41185086965560913, | |
| "learning_rate": 6.591523895401263e-05, | |
| "loss": 1.4348, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 0.1986199431741307, | |
| "grad_norm": 0.4441224932670593, | |
| "learning_rate": 6.60955816050496e-05, | |
| "loss": 1.4103, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 0.19916114192937356, | |
| "grad_norm": 0.3727317452430725, | |
| "learning_rate": 6.627592425608657e-05, | |
| "loss": 1.4188, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.19970234068461643, | |
| "grad_norm": 0.394972562789917, | |
| "learning_rate": 6.645626690712355e-05, | |
| "loss": 1.4095, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 0.20024353943985929, | |
| "grad_norm": 0.40716880559921265, | |
| "learning_rate": 6.663660955816052e-05, | |
| "loss": 1.4127, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.20078473819510215, | |
| "grad_norm": 0.4156644344329834, | |
| "learning_rate": 6.681695220919748e-05, | |
| "loss": 1.4189, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 0.201325936950345, | |
| "grad_norm": 0.3787958323955536, | |
| "learning_rate": 6.699729486023445e-05, | |
| "loss": 1.4221, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 0.20186713570558787, | |
| "grad_norm": 0.42427608370780945, | |
| "learning_rate": 6.717763751127142e-05, | |
| "loss": 1.4192, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 0.20240833446083073, | |
| "grad_norm": 0.4778277277946472, | |
| "learning_rate": 6.735798016230839e-05, | |
| "loss": 1.4024, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 0.2029495332160736, | |
| "grad_norm": 0.44801151752471924, | |
| "learning_rate": 6.753832281334535e-05, | |
| "loss": 1.4222, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.20349073197131648, | |
| "grad_norm": 0.46737611293792725, | |
| "learning_rate": 6.771866546438232e-05, | |
| "loss": 1.4117, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 0.20403193072655934, | |
| "grad_norm": 0.4184872806072235, | |
| "learning_rate": 6.78990081154193e-05, | |
| "loss": 1.4066, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 0.2045731294818022, | |
| "grad_norm": 0.40458211302757263, | |
| "learning_rate": 6.807935076645627e-05, | |
| "loss": 1.4274, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 0.20511432823704506, | |
| "grad_norm": 0.43926185369491577, | |
| "learning_rate": 6.825969341749324e-05, | |
| "loss": 1.4231, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 0.20565552699228792, | |
| "grad_norm": 0.4434867203235626, | |
| "learning_rate": 6.844003606853022e-05, | |
| "loss": 1.4121, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.20619672574753078, | |
| "grad_norm": 0.4500143826007843, | |
| "learning_rate": 6.862037871956719e-05, | |
| "loss": 1.4179, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 0.20673792450277365, | |
| "grad_norm": 0.45456650853157043, | |
| "learning_rate": 6.880072137060415e-05, | |
| "loss": 1.3912, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 0.2072791232580165, | |
| "grad_norm": 0.4214187264442444, | |
| "learning_rate": 6.898106402164112e-05, | |
| "loss": 1.3962, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 0.20782032201325937, | |
| "grad_norm": 0.427682101726532, | |
| "learning_rate": 6.916140667267809e-05, | |
| "loss": 1.4316, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.20836152076850223, | |
| "grad_norm": 0.44491469860076904, | |
| "learning_rate": 6.934174932371507e-05, | |
| "loss": 1.4218, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.2089027195237451, | |
| "grad_norm": 0.42736080288887024, | |
| "learning_rate": 6.952209197475204e-05, | |
| "loss": 1.3931, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 0.20944391827898795, | |
| "grad_norm": 0.4041571021080017, | |
| "learning_rate": 6.9702434625789e-05, | |
| "loss": 1.4201, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 0.2099851170342308, | |
| "grad_norm": 0.4250961244106293, | |
| "learning_rate": 6.988277727682597e-05, | |
| "loss": 1.4299, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 0.21052631578947367, | |
| "grad_norm": 0.4335261881351471, | |
| "learning_rate": 7.006311992786294e-05, | |
| "loss": 1.4125, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 0.21106751454471653, | |
| "grad_norm": 0.42000851035118103, | |
| "learning_rate": 7.02434625788999e-05, | |
| "loss": 1.3969, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.21160871329995942, | |
| "grad_norm": 0.38111838698387146, | |
| "learning_rate": 7.042380522993687e-05, | |
| "loss": 1.3795, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 0.21214991205520228, | |
| "grad_norm": 0.38366812467575073, | |
| "learning_rate": 7.060414788097385e-05, | |
| "loss": 1.4041, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 0.21269111081044514, | |
| "grad_norm": 0.4334602355957031, | |
| "learning_rate": 7.078449053201082e-05, | |
| "loss": 1.415, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 0.213232309565688, | |
| "grad_norm": 0.40296411514282227, | |
| "learning_rate": 7.096483318304779e-05, | |
| "loss": 1.4052, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 0.21377350832093087, | |
| "grad_norm": 0.4197232723236084, | |
| "learning_rate": 7.114517583408477e-05, | |
| "loss": 1.4205, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.21431470707617373, | |
| "grad_norm": 0.40287715196609497, | |
| "learning_rate": 7.132551848512174e-05, | |
| "loss": 1.4047, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 0.2148559058314166, | |
| "grad_norm": 0.37324196100234985, | |
| "learning_rate": 7.15058611361587e-05, | |
| "loss": 1.4398, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 0.21539710458665945, | |
| "grad_norm": 0.4409985840320587, | |
| "learning_rate": 7.168620378719567e-05, | |
| "loss": 1.3873, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 0.2159383033419023, | |
| "grad_norm": 0.41441893577575684, | |
| "learning_rate": 7.186654643823264e-05, | |
| "loss": 1.4174, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 0.21647950209714517, | |
| "grad_norm": 0.4271719455718994, | |
| "learning_rate": 7.204688908926962e-05, | |
| "loss": 1.3987, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.21702070085238803, | |
| "grad_norm": 0.4969992935657501, | |
| "learning_rate": 7.222723174030659e-05, | |
| "loss": 1.4049, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 0.2175618996076309, | |
| "grad_norm": 0.45711180567741394, | |
| "learning_rate": 7.240757439134356e-05, | |
| "loss": 1.4061, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 0.21810309836287375, | |
| "grad_norm": 0.4479979872703552, | |
| "learning_rate": 7.258791704238052e-05, | |
| "loss": 1.4049, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 0.21864429711811662, | |
| "grad_norm": 0.4708006978034973, | |
| "learning_rate": 7.276825969341749e-05, | |
| "loss": 1.3971, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 0.2191854958733595, | |
| "grad_norm": 0.4387456774711609, | |
| "learning_rate": 7.294860234445446e-05, | |
| "loss": 1.4272, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.21972669462860236, | |
| "grad_norm": 0.5285756587982178, | |
| "learning_rate": 7.312894499549143e-05, | |
| "loss": 1.3902, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 0.22026789338384523, | |
| "grad_norm": 0.5111876726150513, | |
| "learning_rate": 7.330928764652841e-05, | |
| "loss": 1.4176, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 0.2208090921390881, | |
| "grad_norm": 0.4643821716308594, | |
| "learning_rate": 7.348963029756538e-05, | |
| "loss": 1.4216, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 0.22135029089433095, | |
| "grad_norm": 0.5162214040756226, | |
| "learning_rate": 7.366997294860236e-05, | |
| "loss": 1.4025, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 0.2218914896495738, | |
| "grad_norm": 0.4296860992908478, | |
| "learning_rate": 7.385031559963932e-05, | |
| "loss": 1.3919, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.22243268840481667, | |
| "grad_norm": 0.4449775815010071, | |
| "learning_rate": 7.403065825067629e-05, | |
| "loss": 1.4002, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 0.22297388716005953, | |
| "grad_norm": 0.39713212847709656, | |
| "learning_rate": 7.421100090171326e-05, | |
| "loss": 1.4012, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 0.2235150859153024, | |
| "grad_norm": 0.41655346751213074, | |
| "learning_rate": 7.439134355275023e-05, | |
| "loss": 1.4155, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 0.22405628467054525, | |
| "grad_norm": 0.3751365542411804, | |
| "learning_rate": 7.45716862037872e-05, | |
| "loss": 1.4021, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 0.2245974834257881, | |
| "grad_norm": 0.41483408212661743, | |
| "learning_rate": 7.475202885482417e-05, | |
| "loss": 1.4207, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.22513868218103097, | |
| "grad_norm": 0.397360235452652, | |
| "learning_rate": 7.493237150586114e-05, | |
| "loss": 1.392, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 0.22567988093627384, | |
| "grad_norm": 0.3874877691268921, | |
| "learning_rate": 7.511271415689811e-05, | |
| "loss": 1.4143, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 0.2262210796915167, | |
| "grad_norm": 0.4382254481315613, | |
| "learning_rate": 7.529305680793508e-05, | |
| "loss": 1.4109, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 0.22676227844675959, | |
| "grad_norm": 0.3728530704975128, | |
| "learning_rate": 7.547339945897204e-05, | |
| "loss": 1.4215, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 0.22730347720200245, | |
| "grad_norm": 0.41155338287353516, | |
| "learning_rate": 7.565374211000901e-05, | |
| "loss": 1.3963, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.2278446759572453, | |
| "grad_norm": 0.3550320267677307, | |
| "learning_rate": 7.5834084761046e-05, | |
| "loss": 1.3998, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 0.22838587471248817, | |
| "grad_norm": 0.3858035206794739, | |
| "learning_rate": 7.601442741208296e-05, | |
| "loss": 1.387, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 0.22892707346773103, | |
| "grad_norm": 0.38636457920074463, | |
| "learning_rate": 7.619477006311994e-05, | |
| "loss": 1.387, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 0.2294682722229739, | |
| "grad_norm": 0.41915518045425415, | |
| "learning_rate": 7.637511271415691e-05, | |
| "loss": 1.3917, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 0.23000947097821675, | |
| "grad_norm": 0.35796865820884705, | |
| "learning_rate": 7.655545536519388e-05, | |
| "loss": 1.406, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.2305506697334596, | |
| "grad_norm": 0.35221853852272034, | |
| "learning_rate": 7.673579801623084e-05, | |
| "loss": 1.3892, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 0.23109186848870247, | |
| "grad_norm": 0.3815077245235443, | |
| "learning_rate": 7.691614066726781e-05, | |
| "loss": 1.3845, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 0.23163306724394533, | |
| "grad_norm": 0.3554491400718689, | |
| "learning_rate": 7.709648331830478e-05, | |
| "loss": 1.3644, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 0.2321742659991882, | |
| "grad_norm": 0.3762814998626709, | |
| "learning_rate": 7.727682596934175e-05, | |
| "loss": 1.3976, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 0.23271546475443106, | |
| "grad_norm": 0.34575173258781433, | |
| "learning_rate": 7.745716862037873e-05, | |
| "loss": 1.3925, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.23325666350967392, | |
| "grad_norm": 0.37864556908607483, | |
| "learning_rate": 7.76375112714157e-05, | |
| "loss": 1.3993, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 0.23379786226491678, | |
| "grad_norm": 0.34448474645614624, | |
| "learning_rate": 7.781785392245266e-05, | |
| "loss": 1.3855, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 0.23433906102015967, | |
| "grad_norm": 0.40932390093803406, | |
| "learning_rate": 7.799819657348963e-05, | |
| "loss": 1.395, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 0.23488025977540253, | |
| "grad_norm": 0.3737650513648987, | |
| "learning_rate": 7.81785392245266e-05, | |
| "loss": 1.3918, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 0.2354214585306454, | |
| "grad_norm": 0.42988118529319763, | |
| "learning_rate": 7.835888187556357e-05, | |
| "loss": 1.3837, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.23596265728588825, | |
| "grad_norm": 0.3865496814250946, | |
| "learning_rate": 7.853922452660055e-05, | |
| "loss": 1.3976, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 0.2365038560411311, | |
| "grad_norm": 0.3682670295238495, | |
| "learning_rate": 7.871956717763751e-05, | |
| "loss": 1.3792, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 0.23704505479637397, | |
| "grad_norm": 0.4236462712287903, | |
| "learning_rate": 7.88999098286745e-05, | |
| "loss": 1.4032, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 0.23758625355161683, | |
| "grad_norm": 0.3742213249206543, | |
| "learning_rate": 7.908025247971146e-05, | |
| "loss": 1.3709, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 0.2381274523068597, | |
| "grad_norm": 0.38234424591064453, | |
| "learning_rate": 7.926059513074843e-05, | |
| "loss": 1.3862, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.23866865106210255, | |
| "grad_norm": 0.37414151430130005, | |
| "learning_rate": 7.94409377817854e-05, | |
| "loss": 1.3751, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 0.23920984981734542, | |
| "grad_norm": 0.3838132619857788, | |
| "learning_rate": 7.962128043282237e-05, | |
| "loss": 1.3805, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 0.23975104857258828, | |
| "grad_norm": 0.3818622827529907, | |
| "learning_rate": 7.980162308385933e-05, | |
| "loss": 1.3735, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 0.24029224732783114, | |
| "grad_norm": 0.38791927695274353, | |
| "learning_rate": 7.99819657348963e-05, | |
| "loss": 1.3958, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 0.240833446083074, | |
| "grad_norm": 0.4164978861808777, | |
| "learning_rate": 8.016230838593328e-05, | |
| "loss": 1.421, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.24137464483831686, | |
| "grad_norm": 0.3721414804458618, | |
| "learning_rate": 8.034265103697025e-05, | |
| "loss": 1.3977, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 0.24191584359355975, | |
| "grad_norm": 0.37698984146118164, | |
| "learning_rate": 8.052299368800722e-05, | |
| "loss": 1.3854, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 0.2424570423488026, | |
| "grad_norm": 0.3553116023540497, | |
| "learning_rate": 8.070333633904418e-05, | |
| "loss": 1.3925, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 0.24299824110404547, | |
| "grad_norm": 0.37809059023857117, | |
| "learning_rate": 8.088367899008115e-05, | |
| "loss": 1.368, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 0.24353943985928833, | |
| "grad_norm": 0.3835943043231964, | |
| "learning_rate": 8.106402164111813e-05, | |
| "loss": 1.3992, | |
| "step": 900 | |
| } | |
| ], | |
| "logging_steps": 2, | |
| "max_steps": 11088, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 300, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.741283047325041e+19, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |