| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.7306183195778649, | |
| "eval_steps": 500, | |
| "global_step": 2700, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.00027059937762143147, | |
| "grad_norm": 4.086390018463135, | |
| "learning_rate": 0.0, | |
| "loss": 3.2754, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0005411987552428629, | |
| "grad_norm": 3.758815288543701, | |
| "learning_rate": 9.017132551848513e-08, | |
| "loss": 3.2863, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.0010823975104857259, | |
| "grad_norm": 3.8250608444213867, | |
| "learning_rate": 2.705139765554554e-07, | |
| "loss": 3.3425, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.0016235962657285888, | |
| "grad_norm": 3.8092095851898193, | |
| "learning_rate": 4.5085662759242564e-07, | |
| "loss": 3.3165, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.0021647950209714517, | |
| "grad_norm": 3.7621052265167236, | |
| "learning_rate": 6.311992786293959e-07, | |
| "loss": 3.3295, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.002705993776214315, | |
| "grad_norm": 3.4136276245117188, | |
| "learning_rate": 8.115419296663661e-07, | |
| "loss": 3.3073, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.0032471925314571776, | |
| "grad_norm": 2.855100393295288, | |
| "learning_rate": 9.918845807033363e-07, | |
| "loss": 3.3031, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.0037883912867000408, | |
| "grad_norm": 2.491767406463623, | |
| "learning_rate": 1.1722272317403068e-06, | |
| "loss": 3.2943, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.0043295900419429035, | |
| "grad_norm": 2.359778642654419, | |
| "learning_rate": 1.3525698827772768e-06, | |
| "loss": 3.2622, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.004870788797185766, | |
| "grad_norm": 2.037504196166992, | |
| "learning_rate": 1.5329125338142473e-06, | |
| "loss": 3.239, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.00541198755242863, | |
| "grad_norm": 2.8542497158050537, | |
| "learning_rate": 1.7132551848512173e-06, | |
| "loss": 3.2031, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.0059531863076714925, | |
| "grad_norm": 2.297046661376953, | |
| "learning_rate": 1.8935978358881876e-06, | |
| "loss": 3.1721, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.006494385062914355, | |
| "grad_norm": 2.2149112224578857, | |
| "learning_rate": 2.0739404869251576e-06, | |
| "loss": 3.121, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.007035583818157218, | |
| "grad_norm": 1.8048591613769531, | |
| "learning_rate": 2.254283137962128e-06, | |
| "loss": 3.0857, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.0075767825734000815, | |
| "grad_norm": 1.7466434240341187, | |
| "learning_rate": 2.4346257889990986e-06, | |
| "loss": 3.0489, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.008117981328642944, | |
| "grad_norm": 2.1722524166107178, | |
| "learning_rate": 2.6149684400360686e-06, | |
| "loss": 3.0016, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.008659180083885807, | |
| "grad_norm": 1.364578366279602, | |
| "learning_rate": 2.7953110910730386e-06, | |
| "loss": 2.9587, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.00920037883912867, | |
| "grad_norm": 1.5823427438735962, | |
| "learning_rate": 2.9756537421100095e-06, | |
| "loss": 2.931, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.009741577594371532, | |
| "grad_norm": 1.2367908954620361, | |
| "learning_rate": 3.1559963931469796e-06, | |
| "loss": 2.8953, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.010282776349614395, | |
| "grad_norm": 1.0437366962432861, | |
| "learning_rate": 3.3363390441839496e-06, | |
| "loss": 2.8412, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.01082397510485726, | |
| "grad_norm": 1.081803798675537, | |
| "learning_rate": 3.5166816952209197e-06, | |
| "loss": 2.7832, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.011365173860100122, | |
| "grad_norm": 0.9715840220451355, | |
| "learning_rate": 3.69702434625789e-06, | |
| "loss": 2.7729, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.011906372615342985, | |
| "grad_norm": 0.8603936433792114, | |
| "learning_rate": 3.877366997294861e-06, | |
| "loss": 2.6904, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.012447571370585848, | |
| "grad_norm": 0.8236231803894043, | |
| "learning_rate": 4.057709648331831e-06, | |
| "loss": 2.6908, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.01298877012582871, | |
| "grad_norm": 0.7681186199188232, | |
| "learning_rate": 4.2380522993688015e-06, | |
| "loss": 2.6212, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.013529968881071573, | |
| "grad_norm": 0.8002827167510986, | |
| "learning_rate": 4.4183949504057716e-06, | |
| "loss": 2.6035, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.014071167636314436, | |
| "grad_norm": 0.6757120490074158, | |
| "learning_rate": 4.598737601442742e-06, | |
| "loss": 2.595, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.014612366391557299, | |
| "grad_norm": 0.6619369387626648, | |
| "learning_rate": 4.779080252479712e-06, | |
| "loss": 2.5522, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.015153565146800163, | |
| "grad_norm": 0.6247105598449707, | |
| "learning_rate": 4.959422903516682e-06, | |
| "loss": 2.5079, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.015694763902043024, | |
| "grad_norm": 0.6559263467788696, | |
| "learning_rate": 5.139765554553652e-06, | |
| "loss": 2.5009, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.01623596265728589, | |
| "grad_norm": 0.6590877175331116, | |
| "learning_rate": 5.320108205590623e-06, | |
| "loss": 2.4648, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.01677716141252875, | |
| "grad_norm": 0.6045516133308411, | |
| "learning_rate": 5.500450856627593e-06, | |
| "loss": 2.421, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.017318360167771614, | |
| "grad_norm": 0.6533932089805603, | |
| "learning_rate": 5.680793507664563e-06, | |
| "loss": 2.3966, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.01785955892301448, | |
| "grad_norm": 0.6478094458580017, | |
| "learning_rate": 5.861136158701533e-06, | |
| "loss": 2.3903, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.01840075767825734, | |
| "grad_norm": 0.7349300980567932, | |
| "learning_rate": 6.041478809738504e-06, | |
| "loss": 2.3552, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.018941956433500204, | |
| "grad_norm": 0.6454821825027466, | |
| "learning_rate": 6.221821460775474e-06, | |
| "loss": 2.3262, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.019483155188743065, | |
| "grad_norm": 0.7321672439575195, | |
| "learning_rate": 6.402164111812444e-06, | |
| "loss": 2.3197, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.02002435394398593, | |
| "grad_norm": 0.7664237022399902, | |
| "learning_rate": 6.582506762849414e-06, | |
| "loss": 2.2992, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.02056555269922879, | |
| "grad_norm": 0.6843811869621277, | |
| "learning_rate": 6.762849413886384e-06, | |
| "loss": 2.2927, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.021106751454471655, | |
| "grad_norm": 0.7199612259864807, | |
| "learning_rate": 6.9431920649233556e-06, | |
| "loss": 2.2525, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.02164795020971452, | |
| "grad_norm": 0.778446614742279, | |
| "learning_rate": 7.123534715960326e-06, | |
| "loss": 2.2267, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.02218914896495738, | |
| "grad_norm": 0.9287930727005005, | |
| "learning_rate": 7.303877366997296e-06, | |
| "loss": 2.2206, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.022730347720200245, | |
| "grad_norm": 1.033782958984375, | |
| "learning_rate": 7.484220018034266e-06, | |
| "loss": 2.2063, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.023271546475443106, | |
| "grad_norm": 1.0132615566253662, | |
| "learning_rate": 7.664562669071236e-06, | |
| "loss": 2.1677, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.02381274523068597, | |
| "grad_norm": 0.9043529033660889, | |
| "learning_rate": 7.844905320108207e-06, | |
| "loss": 2.1696, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.02435394398592883, | |
| "grad_norm": 0.6718290448188782, | |
| "learning_rate": 8.025247971145176e-06, | |
| "loss": 2.1492, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.024895142741171696, | |
| "grad_norm": 0.9615944027900696, | |
| "learning_rate": 8.205590622182147e-06, | |
| "loss": 2.1452, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.02543634149641456, | |
| "grad_norm": 0.9435996413230896, | |
| "learning_rate": 8.385933273219116e-06, | |
| "loss": 2.1098, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.02597754025165742, | |
| "grad_norm": 0.7614261507987976, | |
| "learning_rate": 8.566275924256087e-06, | |
| "loss": 2.1286, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.026518739006900285, | |
| "grad_norm": 0.9416339993476868, | |
| "learning_rate": 8.746618575293058e-06, | |
| "loss": 2.1092, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.027059937762143146, | |
| "grad_norm": 0.9229443073272705, | |
| "learning_rate": 8.926961226330027e-06, | |
| "loss": 2.0932, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.02760113651738601, | |
| "grad_norm": 0.7135593295097351, | |
| "learning_rate": 9.107303877366998e-06, | |
| "loss": 2.0699, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.028142335272628872, | |
| "grad_norm": 1.0263723134994507, | |
| "learning_rate": 9.287646528403967e-06, | |
| "loss": 2.0445, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.028683534027871736, | |
| "grad_norm": 1.0300300121307373, | |
| "learning_rate": 9.467989179440938e-06, | |
| "loss": 2.0463, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.029224732783114597, | |
| "grad_norm": 0.8331286311149597, | |
| "learning_rate": 9.648331830477909e-06, | |
| "loss": 2.0381, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.02976593153835746, | |
| "grad_norm": 0.7501435875892639, | |
| "learning_rate": 9.828674481514878e-06, | |
| "loss": 2.0411, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.030307130293600326, | |
| "grad_norm": 0.6895191073417664, | |
| "learning_rate": 1.0009017132551849e-05, | |
| "loss": 2.0475, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.030848329048843187, | |
| "grad_norm": 0.95854252576828, | |
| "learning_rate": 1.018935978358882e-05, | |
| "loss": 2.0071, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.03138952780408605, | |
| "grad_norm": 1.1303929090499878, | |
| "learning_rate": 1.036970243462579e-05, | |
| "loss": 2.0008, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.031930726559328916, | |
| "grad_norm": 0.7708876729011536, | |
| "learning_rate": 1.055004508566276e-05, | |
| "loss": 2.0061, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.03247192531457178, | |
| "grad_norm": 0.9773860573768616, | |
| "learning_rate": 1.073038773669973e-05, | |
| "loss": 2.0096, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.03301312406981464, | |
| "grad_norm": 1.118385910987854, | |
| "learning_rate": 1.09107303877367e-05, | |
| "loss": 1.9939, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.0335543228250575, | |
| "grad_norm": 0.7215014696121216, | |
| "learning_rate": 1.109107303877367e-05, | |
| "loss": 1.9515, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.03409552158030037, | |
| "grad_norm": 0.9696834683418274, | |
| "learning_rate": 1.1271415689810642e-05, | |
| "loss": 1.9639, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.03463672033554323, | |
| "grad_norm": 0.945482611656189, | |
| "learning_rate": 1.1451758340847611e-05, | |
| "loss": 1.9397, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.03517791909078609, | |
| "grad_norm": 0.7454535365104675, | |
| "learning_rate": 1.1632100991884582e-05, | |
| "loss": 1.9353, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.03571911784602896, | |
| "grad_norm": 0.7824187278747559, | |
| "learning_rate": 1.1812443642921551e-05, | |
| "loss": 1.9227, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.03626031660127182, | |
| "grad_norm": 0.7939879894256592, | |
| "learning_rate": 1.1992786293958522e-05, | |
| "loss": 1.9126, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.03680151535651468, | |
| "grad_norm": 0.7776147723197937, | |
| "learning_rate": 1.2173128944995491e-05, | |
| "loss": 1.9002, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.03734271411175754, | |
| "grad_norm": 0.6580236554145813, | |
| "learning_rate": 1.2353471596032462e-05, | |
| "loss": 1.9121, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.03788391286700041, | |
| "grad_norm": 0.7200301289558411, | |
| "learning_rate": 1.2533814247069433e-05, | |
| "loss": 1.8885, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.03842511162224327, | |
| "grad_norm": 0.7958497405052185, | |
| "learning_rate": 1.2714156898106402e-05, | |
| "loss": 1.9095, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.03896631037748613, | |
| "grad_norm": 0.9120681881904602, | |
| "learning_rate": 1.2894499549143375e-05, | |
| "loss": 1.884, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.039507509132729, | |
| "grad_norm": 0.8108247518539429, | |
| "learning_rate": 1.3074842200180342e-05, | |
| "loss": 1.8656, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.04004870788797186, | |
| "grad_norm": 0.7010449171066284, | |
| "learning_rate": 1.3255184851217315e-05, | |
| "loss": 1.8635, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.04058990664321472, | |
| "grad_norm": 0.8178524374961853, | |
| "learning_rate": 1.3435527502254284e-05, | |
| "loss": 1.8933, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.04113110539845758, | |
| "grad_norm": 1.0447405576705933, | |
| "learning_rate": 1.3615870153291255e-05, | |
| "loss": 1.8523, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.04167230415370045, | |
| "grad_norm": 0.8516271710395813, | |
| "learning_rate": 1.3796212804328224e-05, | |
| "loss": 1.8528, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.04221350290894331, | |
| "grad_norm": 0.8437328934669495, | |
| "learning_rate": 1.3976555455365195e-05, | |
| "loss": 1.861, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.04275470166418617, | |
| "grad_norm": 0.851265549659729, | |
| "learning_rate": 1.4156898106402164e-05, | |
| "loss": 1.8315, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.04329590041942904, | |
| "grad_norm": 0.7337156534194946, | |
| "learning_rate": 1.4337240757439135e-05, | |
| "loss": 1.8354, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.0438370991746719, | |
| "grad_norm": 0.9754143357276917, | |
| "learning_rate": 1.4517583408476104e-05, | |
| "loss": 1.8252, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.04437829792991476, | |
| "grad_norm": 0.6172115802764893, | |
| "learning_rate": 1.4697926059513075e-05, | |
| "loss": 1.8094, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.04491949668515762, | |
| "grad_norm": 0.8304158449172974, | |
| "learning_rate": 1.4878268710550044e-05, | |
| "loss": 1.8078, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.04546069544040049, | |
| "grad_norm": 0.6388853788375854, | |
| "learning_rate": 1.5058611361587017e-05, | |
| "loss": 1.8106, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.04600189419564335, | |
| "grad_norm": 0.743231475353241, | |
| "learning_rate": 1.5238954012623984e-05, | |
| "loss": 1.8144, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.04654309295088621, | |
| "grad_norm": 0.6442289352416992, | |
| "learning_rate": 1.5419296663660955e-05, | |
| "loss": 1.7831, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.04708429170612908, | |
| "grad_norm": 0.6877187490463257, | |
| "learning_rate": 1.559963931469793e-05, | |
| "loss": 1.8043, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.04762549046137194, | |
| "grad_norm": 0.9389640688896179, | |
| "learning_rate": 1.5779981965734897e-05, | |
| "loss": 1.7869, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.0481666892166148, | |
| "grad_norm": 1.0456589460372925, | |
| "learning_rate": 1.5960324616771868e-05, | |
| "loss": 1.7681, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.04870788797185766, | |
| "grad_norm": 0.9617791175842285, | |
| "learning_rate": 1.614066726780884e-05, | |
| "loss": 1.7668, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.04924908672710053, | |
| "grad_norm": 0.9334360361099243, | |
| "learning_rate": 1.632100991884581e-05, | |
| "loss": 1.7893, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.04979028548234339, | |
| "grad_norm": 0.8952531814575195, | |
| "learning_rate": 1.6501352569882777e-05, | |
| "loss": 1.7758, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.05033148423758625, | |
| "grad_norm": 0.8544924855232239, | |
| "learning_rate": 1.6681695220919748e-05, | |
| "loss": 1.793, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.05087268299282912, | |
| "grad_norm": 0.7782765030860901, | |
| "learning_rate": 1.686203787195672e-05, | |
| "loss": 1.768, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.05141388174807198, | |
| "grad_norm": 0.7119695544242859, | |
| "learning_rate": 1.704238052299369e-05, | |
| "loss": 1.7685, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.05195508050331484, | |
| "grad_norm": 0.9119647145271301, | |
| "learning_rate": 1.7222723174030657e-05, | |
| "loss": 1.7706, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.0524962792585577, | |
| "grad_norm": 0.6414957642555237, | |
| "learning_rate": 1.7403065825067628e-05, | |
| "loss": 1.7626, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.05303747801380057, | |
| "grad_norm": 0.8069677352905273, | |
| "learning_rate": 1.75834084761046e-05, | |
| "loss": 1.7423, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.05357867676904343, | |
| "grad_norm": 0.6549937725067139, | |
| "learning_rate": 1.776375112714157e-05, | |
| "loss": 1.7428, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.05411987552428629, | |
| "grad_norm": 0.8064024448394775, | |
| "learning_rate": 1.7944093778178538e-05, | |
| "loss": 1.7448, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.054661074279529154, | |
| "grad_norm": 0.7182701826095581, | |
| "learning_rate": 1.8124436429215512e-05, | |
| "loss": 1.7248, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.05520227303477202, | |
| "grad_norm": 0.6997919678688049, | |
| "learning_rate": 1.830477908025248e-05, | |
| "loss": 1.7281, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.05574347179001488, | |
| "grad_norm": 0.7071277499198914, | |
| "learning_rate": 1.848512173128945e-05, | |
| "loss": 1.714, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.056284670545257744, | |
| "grad_norm": 0.6344273090362549, | |
| "learning_rate": 1.866546438232642e-05, | |
| "loss": 1.7463, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.05682586930050061, | |
| "grad_norm": 0.7192733883857727, | |
| "learning_rate": 1.8845807033363392e-05, | |
| "loss": 1.737, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.05736706805574347, | |
| "grad_norm": 0.7418521642684937, | |
| "learning_rate": 1.9026149684400363e-05, | |
| "loss": 1.7197, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.057908266810986334, | |
| "grad_norm": 0.875845730304718, | |
| "learning_rate": 1.920649233543733e-05, | |
| "loss": 1.6968, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.058449465566229195, | |
| "grad_norm": 0.7394037842750549, | |
| "learning_rate": 1.9386834986474305e-05, | |
| "loss": 1.7051, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.05899066432147206, | |
| "grad_norm": 0.6689572930335999, | |
| "learning_rate": 1.9567177637511272e-05, | |
| "loss": 1.7152, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.05953186307671492, | |
| "grad_norm": 0.7955539226531982, | |
| "learning_rate": 1.9747520288548243e-05, | |
| "loss": 1.7136, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.060073061831957784, | |
| "grad_norm": 0.7005388140678406, | |
| "learning_rate": 1.9927862939585214e-05, | |
| "loss": 1.7152, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.06061426058720065, | |
| "grad_norm": 0.6205731630325317, | |
| "learning_rate": 2.0108205590622185e-05, | |
| "loss": 1.6901, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.06115545934244351, | |
| "grad_norm": 0.7079929709434509, | |
| "learning_rate": 2.0288548241659152e-05, | |
| "loss": 1.6905, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.061696658097686374, | |
| "grad_norm": 0.6871302723884583, | |
| "learning_rate": 2.0468890892696123e-05, | |
| "loss": 1.6867, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.062237856852929235, | |
| "grad_norm": 0.7172162532806396, | |
| "learning_rate": 2.0649233543733094e-05, | |
| "loss": 1.685, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.0627790556081721, | |
| "grad_norm": 0.6729004979133606, | |
| "learning_rate": 2.0829576194770065e-05, | |
| "loss": 1.6961, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.06332025436341496, | |
| "grad_norm": 0.7335099577903748, | |
| "learning_rate": 2.1009918845807033e-05, | |
| "loss": 1.6797, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.06386145311865783, | |
| "grad_norm": 0.6398060321807861, | |
| "learning_rate": 2.1190261496844003e-05, | |
| "loss": 1.7037, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.0644026518739007, | |
| "grad_norm": 0.7026365399360657, | |
| "learning_rate": 2.1370604147880974e-05, | |
| "loss": 1.6698, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.06494385062914355, | |
| "grad_norm": 0.7972332239151001, | |
| "learning_rate": 2.1550946798917945e-05, | |
| "loss": 1.6866, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.06548504938438642, | |
| "grad_norm": 0.7363021969795227, | |
| "learning_rate": 2.1731289449954913e-05, | |
| "loss": 1.6879, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.06602624813962928, | |
| "grad_norm": 0.7071017026901245, | |
| "learning_rate": 2.1911632100991887e-05, | |
| "loss": 1.6898, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.06656744689487214, | |
| "grad_norm": 0.8030880093574524, | |
| "learning_rate": 2.2091974752028858e-05, | |
| "loss": 1.6734, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.067108645650115, | |
| "grad_norm": 0.7429569363594055, | |
| "learning_rate": 2.2272317403065825e-05, | |
| "loss": 1.6722, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.06764984440535787, | |
| "grad_norm": 0.6807804107666016, | |
| "learning_rate": 2.2452660054102796e-05, | |
| "loss": 1.6697, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.06819104316060073, | |
| "grad_norm": 0.6632562875747681, | |
| "learning_rate": 2.2633002705139767e-05, | |
| "loss": 1.6453, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.0687322419158436, | |
| "grad_norm": 0.6661680340766907, | |
| "learning_rate": 2.2813345356176738e-05, | |
| "loss": 1.6701, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.06927344067108646, | |
| "grad_norm": 0.6747105121612549, | |
| "learning_rate": 2.2993688007213706e-05, | |
| "loss": 1.6729, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.06981463942632932, | |
| "grad_norm": 0.7698473334312439, | |
| "learning_rate": 2.317403065825068e-05, | |
| "loss": 1.6528, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.07035583818157218, | |
| "grad_norm": 0.6111325621604919, | |
| "learning_rate": 2.3354373309287647e-05, | |
| "loss": 1.6412, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.07089703693681504, | |
| "grad_norm": 0.7405019998550415, | |
| "learning_rate": 2.3534715960324618e-05, | |
| "loss": 1.6564, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.07143823569205791, | |
| "grad_norm": 0.6702501773834229, | |
| "learning_rate": 2.371505861136159e-05, | |
| "loss": 1.654, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.07197943444730077, | |
| "grad_norm": 0.7076373100280762, | |
| "learning_rate": 2.389540126239856e-05, | |
| "loss": 1.6301, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.07252063320254364, | |
| "grad_norm": 0.7239627242088318, | |
| "learning_rate": 2.4075743913435528e-05, | |
| "loss": 1.6575, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.0730618319577865, | |
| "grad_norm": 0.753480076789856, | |
| "learning_rate": 2.42560865644725e-05, | |
| "loss": 1.6603, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.07360303071302936, | |
| "grad_norm": 0.7261641025543213, | |
| "learning_rate": 2.443642921550947e-05, | |
| "loss": 1.6449, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.07414422946827222, | |
| "grad_norm": 0.6315119862556458, | |
| "learning_rate": 2.461677186654644e-05, | |
| "loss": 1.6538, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.07468542822351508, | |
| "grad_norm": 0.5698412656784058, | |
| "learning_rate": 2.4797114517583408e-05, | |
| "loss": 1.6663, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.07522662697875795, | |
| "grad_norm": 0.5968983173370361, | |
| "learning_rate": 2.497745716862038e-05, | |
| "loss": 1.643, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.07576782573400082, | |
| "grad_norm": 0.561126172542572, | |
| "learning_rate": 2.5157799819657353e-05, | |
| "loss": 1.6301, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.07630902448924368, | |
| "grad_norm": 0.7290865778923035, | |
| "learning_rate": 2.533814247069432e-05, | |
| "loss": 1.6412, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.07685022324448654, | |
| "grad_norm": 0.7629122138023376, | |
| "learning_rate": 2.5518485121731288e-05, | |
| "loss": 1.6335, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.0773914219997294, | |
| "grad_norm": 0.5383496284484863, | |
| "learning_rate": 2.5698827772768262e-05, | |
| "loss": 1.6226, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.07793262075497226, | |
| "grad_norm": 0.7778373956680298, | |
| "learning_rate": 2.5879170423805233e-05, | |
| "loss": 1.6333, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.07847381951021512, | |
| "grad_norm": 0.6851366758346558, | |
| "learning_rate": 2.60595130748422e-05, | |
| "loss": 1.6251, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.079015018265458, | |
| "grad_norm": 0.5947225689888, | |
| "learning_rate": 2.623985572587917e-05, | |
| "loss": 1.6298, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.07955621702070086, | |
| "grad_norm": 0.9742544889450073, | |
| "learning_rate": 2.6420198376916146e-05, | |
| "loss": 1.6252, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.08009741577594372, | |
| "grad_norm": 1.2064323425292969, | |
| "learning_rate": 2.6600541027953113e-05, | |
| "loss": 1.6152, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.08063861453118658, | |
| "grad_norm": 1.0506716966629028, | |
| "learning_rate": 2.678088367899008e-05, | |
| "loss": 1.6351, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.08117981328642944, | |
| "grad_norm": 1.2992738485336304, | |
| "learning_rate": 2.696122633002705e-05, | |
| "loss": 1.6193, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.0817210120416723, | |
| "grad_norm": 1.0616599321365356, | |
| "learning_rate": 2.7141568981064026e-05, | |
| "loss": 1.6135, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.08226221079691516, | |
| "grad_norm": 1.037997841835022, | |
| "learning_rate": 2.7321911632100993e-05, | |
| "loss": 1.6344, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.08280340955215804, | |
| "grad_norm": 0.8937569856643677, | |
| "learning_rate": 2.7502254283137964e-05, | |
| "loss": 1.6077, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.0833446083074009, | |
| "grad_norm": 1.1334234476089478, | |
| "learning_rate": 2.7682596934174932e-05, | |
| "loss": 1.6193, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.08388580706264376, | |
| "grad_norm": 0.8336219191551208, | |
| "learning_rate": 2.7862939585211906e-05, | |
| "loss": 1.5948, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.08442700581788662, | |
| "grad_norm": 1.1825398206710815, | |
| "learning_rate": 2.8043282236248874e-05, | |
| "loss": 1.6239, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.08496820457312948, | |
| "grad_norm": 0.7945433259010315, | |
| "learning_rate": 2.8223624887285844e-05, | |
| "loss": 1.6119, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.08550940332837234, | |
| "grad_norm": 0.6971009969711304, | |
| "learning_rate": 2.8403967538322812e-05, | |
| "loss": 1.5822, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.0860506020836152, | |
| "grad_norm": 0.6050766706466675, | |
| "learning_rate": 2.8584310189359786e-05, | |
| "loss": 1.6161, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.08659180083885808, | |
| "grad_norm": 0.6123189330101013, | |
| "learning_rate": 2.8764652840396754e-05, | |
| "loss": 1.5941, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.08713299959410094, | |
| "grad_norm": 0.5471253395080566, | |
| "learning_rate": 2.8944995491433725e-05, | |
| "loss": 1.603, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.0876741983493438, | |
| "grad_norm": 0.5793882608413696, | |
| "learning_rate": 2.91253381424707e-05, | |
| "loss": 1.6076, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.08821539710458666, | |
| "grad_norm": 0.5409413576126099, | |
| "learning_rate": 2.9305680793507666e-05, | |
| "loss": 1.5825, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.08875659585982952, | |
| "grad_norm": 6.757148265838623, | |
| "learning_rate": 2.9486023444544637e-05, | |
| "loss": 1.5942, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.08929779461507238, | |
| "grad_norm": 1.3357856273651123, | |
| "learning_rate": 2.9666366095581605e-05, | |
| "loss": 1.642, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.08983899337031524, | |
| "grad_norm": 0.8245829939842224, | |
| "learning_rate": 2.984670874661858e-05, | |
| "loss": 1.6062, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.09038019212555812, | |
| "grad_norm": 0.8888993263244629, | |
| "learning_rate": 3.0027051397655547e-05, | |
| "loss": 1.5952, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.09092139088080098, | |
| "grad_norm": 0.8923915028572083, | |
| "learning_rate": 3.0207394048692517e-05, | |
| "loss": 1.5977, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.09146258963604384, | |
| "grad_norm": 0.7443459033966064, | |
| "learning_rate": 3.0387736699729485e-05, | |
| "loss": 1.5738, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.0920037883912867, | |
| "grad_norm": 0.7297430038452148, | |
| "learning_rate": 3.056807935076646e-05, | |
| "loss": 1.5907, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.09254498714652956, | |
| "grad_norm": 0.6882812976837158, | |
| "learning_rate": 3.074842200180343e-05, | |
| "loss": 1.5767, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.09308618590177242, | |
| "grad_norm": 0.6150392889976501, | |
| "learning_rate": 3.0928764652840394e-05, | |
| "loss": 1.5747, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.09362738465701528, | |
| "grad_norm": 0.6230599284172058, | |
| "learning_rate": 3.110910730387737e-05, | |
| "loss": 1.583, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.09416858341225816, | |
| "grad_norm": 0.6081874966621399, | |
| "learning_rate": 3.128944995491434e-05, | |
| "loss": 1.5875, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.09470978216750102, | |
| "grad_norm": 0.5467821955680847, | |
| "learning_rate": 3.146979260595131e-05, | |
| "loss": 1.575, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.09525098092274388, | |
| "grad_norm": 0.5629361271858215, | |
| "learning_rate": 3.165013525698828e-05, | |
| "loss": 1.5828, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.09579217967798674, | |
| "grad_norm": 0.5995283126831055, | |
| "learning_rate": 3.1830477908025245e-05, | |
| "loss": 1.5872, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.0963333784332296, | |
| "grad_norm": 0.556450366973877, | |
| "learning_rate": 3.201082055906222e-05, | |
| "loss": 1.553, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.09687457718847246, | |
| "grad_norm": 0.6498537063598633, | |
| "learning_rate": 3.219116321009919e-05, | |
| "loss": 1.5667, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.09741577594371532, | |
| "grad_norm": 0.5891172885894775, | |
| "learning_rate": 3.237150586113616e-05, | |
| "loss": 1.5818, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.0979569746989582, | |
| "grad_norm": 0.6487797498703003, | |
| "learning_rate": 3.2551848512173136e-05, | |
| "loss": 1.5582, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.09849817345420106, | |
| "grad_norm": 0.5860658884048462, | |
| "learning_rate": 3.27321911632101e-05, | |
| "loss": 1.5725, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.09903937220944392, | |
| "grad_norm": 0.5619581937789917, | |
| "learning_rate": 3.291253381424707e-05, | |
| "loss": 1.5779, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.09958057096468678, | |
| "grad_norm": 0.7147429585456848, | |
| "learning_rate": 3.309287646528404e-05, | |
| "loss": 1.5766, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.10012176971992964, | |
| "grad_norm": 0.5840562582015991, | |
| "learning_rate": 3.327321911632101e-05, | |
| "loss": 1.5609, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.1006629684751725, | |
| "grad_norm": 0.6277860403060913, | |
| "learning_rate": 3.345356176735798e-05, | |
| "loss": 1.5645, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.10120416723041536, | |
| "grad_norm": 0.6395567655563354, | |
| "learning_rate": 3.3633904418394954e-05, | |
| "loss": 1.545, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.10174536598565824, | |
| "grad_norm": 0.6651553511619568, | |
| "learning_rate": 3.381424706943192e-05, | |
| "loss": 1.5643, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.1022865647409011, | |
| "grad_norm": 0.6691033244132996, | |
| "learning_rate": 3.3994589720468896e-05, | |
| "loss": 1.5705, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.10282776349614396, | |
| "grad_norm": 0.5426511764526367, | |
| "learning_rate": 3.4174932371505863e-05, | |
| "loss": 1.536, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.10336896225138682, | |
| "grad_norm": 0.6677694916725159, | |
| "learning_rate": 3.435527502254283e-05, | |
| "loss": 1.5664, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.10391016100662968, | |
| "grad_norm": 0.5283762216567993, | |
| "learning_rate": 3.45356176735798e-05, | |
| "loss": 1.5474, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.10445135976187254, | |
| "grad_norm": 0.652812659740448, | |
| "learning_rate": 3.471596032461677e-05, | |
| "loss": 1.5509, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.1049925585171154, | |
| "grad_norm": 0.8639987111091614, | |
| "learning_rate": 3.489630297565375e-05, | |
| "loss": 1.5563, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.10553375727235827, | |
| "grad_norm": 0.7726946473121643, | |
| "learning_rate": 3.5076645626690715e-05, | |
| "loss": 1.5682, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.10607495602760114, | |
| "grad_norm": 0.6511155962944031, | |
| "learning_rate": 3.525698827772768e-05, | |
| "loss": 1.5571, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.106616154782844, | |
| "grad_norm": 0.6578395962715149, | |
| "learning_rate": 3.5437330928764656e-05, | |
| "loss": 1.5452, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.10715735353808686, | |
| "grad_norm": 0.642919659614563, | |
| "learning_rate": 3.5617673579801624e-05, | |
| "loss": 1.5508, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.10769855229332972, | |
| "grad_norm": 0.5190348029136658, | |
| "learning_rate": 3.579801623083859e-05, | |
| "loss": 1.5432, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.10823975104857259, | |
| "grad_norm": 0.48932549357414246, | |
| "learning_rate": 3.5978358881875566e-05, | |
| "loss": 1.5544, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.10878094980381545, | |
| "grad_norm": 0.5018340945243835, | |
| "learning_rate": 3.615870153291254e-05, | |
| "loss": 1.5322, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.10932214855905831, | |
| "grad_norm": 0.5701499581336975, | |
| "learning_rate": 3.633904418394951e-05, | |
| "loss": 1.5288, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.10986334731430118, | |
| "grad_norm": 0.6049205660820007, | |
| "learning_rate": 3.6519386834986475e-05, | |
| "loss": 1.5627, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.11040454606954404, | |
| "grad_norm": 0.5781517028808594, | |
| "learning_rate": 3.669972948602345e-05, | |
| "loss": 1.542, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.1109457448247869, | |
| "grad_norm": 0.5594660043716431, | |
| "learning_rate": 3.688007213706042e-05, | |
| "loss": 1.5461, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.11148694358002977, | |
| "grad_norm": 0.5319619178771973, | |
| "learning_rate": 3.7060414788097384e-05, | |
| "loss": 1.5668, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.11202814233527263, | |
| "grad_norm": 0.5311123728752136, | |
| "learning_rate": 3.724075743913435e-05, | |
| "loss": 1.528, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.11256934109051549, | |
| "grad_norm": 0.5555101633071899, | |
| "learning_rate": 3.7421100090171326e-05, | |
| "loss": 1.5392, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.11311053984575835, | |
| "grad_norm": 0.5486223101615906, | |
| "learning_rate": 3.76014427412083e-05, | |
| "loss": 1.5337, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.11365173860100122, | |
| "grad_norm": 0.5156669020652771, | |
| "learning_rate": 3.778178539224527e-05, | |
| "loss": 1.5105, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.11419293735624408, | |
| "grad_norm": 0.49596554040908813, | |
| "learning_rate": 3.7962128043282235e-05, | |
| "loss": 1.515, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.11473413611148695, | |
| "grad_norm": 0.641333281993866, | |
| "learning_rate": 3.814247069431921e-05, | |
| "loss": 1.5328, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.1152753348667298, | |
| "grad_norm": 0.6106113195419312, | |
| "learning_rate": 3.832281334535618e-05, | |
| "loss": 1.5189, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.11581653362197267, | |
| "grad_norm": 0.5619134306907654, | |
| "learning_rate": 3.8503155996393145e-05, | |
| "loss": 1.5295, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.11635773237721553, | |
| "grad_norm": 0.5396978259086609, | |
| "learning_rate": 3.868349864743012e-05, | |
| "loss": 1.5173, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.11689893113245839, | |
| "grad_norm": 0.5466894507408142, | |
| "learning_rate": 3.886384129846709e-05, | |
| "loss": 1.5191, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.11744012988770126, | |
| "grad_norm": 0.5601218342781067, | |
| "learning_rate": 3.904418394950406e-05, | |
| "loss": 1.5285, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.11798132864294412, | |
| "grad_norm": 0.6620492935180664, | |
| "learning_rate": 3.922452660054103e-05, | |
| "loss": 1.4946, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.11852252739818699, | |
| "grad_norm": 0.49140048027038574, | |
| "learning_rate": 3.9404869251578e-05, | |
| "loss": 1.512, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.11906372615342985, | |
| "grad_norm": 0.5824118256568909, | |
| "learning_rate": 3.958521190261497e-05, | |
| "loss": 1.5244, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.11960492490867271, | |
| "grad_norm": 0.4967150092124939, | |
| "learning_rate": 3.976555455365194e-05, | |
| "loss": 1.5273, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.12014612366391557, | |
| "grad_norm": 0.5089767575263977, | |
| "learning_rate": 3.994589720468891e-05, | |
| "loss": 1.5119, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.12068732241915843, | |
| "grad_norm": 0.5404312014579773, | |
| "learning_rate": 4.0126239855725886e-05, | |
| "loss": 1.5072, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.1212285211744013, | |
| "grad_norm": 0.5239550471305847, | |
| "learning_rate": 4.0306582506762853e-05, | |
| "loss": 1.5336, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.12176971992964417, | |
| "grad_norm": 0.4974781274795532, | |
| "learning_rate": 4.048692515779982e-05, | |
| "loss": 1.5225, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.12231091868488703, | |
| "grad_norm": 0.5363791584968567, | |
| "learning_rate": 4.066726780883679e-05, | |
| "loss": 1.5176, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.12285211744012989, | |
| "grad_norm": 0.5095157027244568, | |
| "learning_rate": 4.084761045987376e-05, | |
| "loss": 1.4936, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.12339331619537275, | |
| "grad_norm": 0.4920356869697571, | |
| "learning_rate": 4.102795311091073e-05, | |
| "loss": 1.5269, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.12393451495061561, | |
| "grad_norm": 0.4940793514251709, | |
| "learning_rate": 4.1208295761947705e-05, | |
| "loss": 1.5072, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.12447571370585847, | |
| "grad_norm": 0.4805227220058441, | |
| "learning_rate": 4.138863841298467e-05, | |
| "loss": 1.4987, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.12501691246110133, | |
| "grad_norm": 0.49683934450149536, | |
| "learning_rate": 4.1568981064021646e-05, | |
| "loss": 1.5008, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.1255581112163442, | |
| "grad_norm": 0.5283801555633545, | |
| "learning_rate": 4.1749323715058614e-05, | |
| "loss": 1.5177, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.12609930997158705, | |
| "grad_norm": 0.5395119190216064, | |
| "learning_rate": 4.192966636609558e-05, | |
| "loss": 1.5106, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.12664050872682991, | |
| "grad_norm": 0.5403693914413452, | |
| "learning_rate": 4.211000901713255e-05, | |
| "loss": 1.4854, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.1271817074820728, | |
| "grad_norm": 0.4690951406955719, | |
| "learning_rate": 4.229035166816952e-05, | |
| "loss": 1.5079, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.12772290623731566, | |
| "grad_norm": 0.5077293515205383, | |
| "learning_rate": 4.24706943192065e-05, | |
| "loss": 1.4953, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.12826410499255853, | |
| "grad_norm": 0.440019816160202, | |
| "learning_rate": 4.2651036970243465e-05, | |
| "loss": 1.4864, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.1288053037478014, | |
| "grad_norm": 0.48672759532928467, | |
| "learning_rate": 4.283137962128044e-05, | |
| "loss": 1.5205, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.12934650250304425, | |
| "grad_norm": 0.4732811450958252, | |
| "learning_rate": 4.301172227231741e-05, | |
| "loss": 1.4998, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.1298877012582871, | |
| "grad_norm": 0.46713048219680786, | |
| "learning_rate": 4.3192064923354374e-05, | |
| "loss": 1.4893, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.13042890001352997, | |
| "grad_norm": 0.502356231212616, | |
| "learning_rate": 4.337240757439134e-05, | |
| "loss": 1.5125, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.13097009876877283, | |
| "grad_norm": 0.45067864656448364, | |
| "learning_rate": 4.3552750225428316e-05, | |
| "loss": 1.4978, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.1315112975240157, | |
| "grad_norm": 0.46964120864868164, | |
| "learning_rate": 4.373309287646529e-05, | |
| "loss": 1.5006, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.13205249627925855, | |
| "grad_norm": 0.47723180055618286, | |
| "learning_rate": 4.391343552750226e-05, | |
| "loss": 1.513, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.1325936950345014, | |
| "grad_norm": 0.5100542306900024, | |
| "learning_rate": 4.4093778178539225e-05, | |
| "loss": 1.5279, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.13313489378974427, | |
| "grad_norm": 0.5344257354736328, | |
| "learning_rate": 4.42741208295762e-05, | |
| "loss": 1.5193, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.13367609254498714, | |
| "grad_norm": 0.5867893695831299, | |
| "learning_rate": 4.445446348061317e-05, | |
| "loss": 1.512, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.13421729130023, | |
| "grad_norm": 0.7811394929885864, | |
| "learning_rate": 4.4634806131650134e-05, | |
| "loss": 1.5038, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.13475849005547288, | |
| "grad_norm": 0.8505339622497559, | |
| "learning_rate": 4.48151487826871e-05, | |
| "loss": 1.5169, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.13529968881071575, | |
| "grad_norm": 0.6337641477584839, | |
| "learning_rate": 4.4995491433724076e-05, | |
| "loss": 1.4951, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.1358408875659586, | |
| "grad_norm": 0.7979961633682251, | |
| "learning_rate": 4.517583408476105e-05, | |
| "loss": 1.5031, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.13638208632120147, | |
| "grad_norm": 0.6946894526481628, | |
| "learning_rate": 4.535617673579802e-05, | |
| "loss": 1.501, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.13692328507644433, | |
| "grad_norm": 0.6830259561538696, | |
| "learning_rate": 4.5536519386834986e-05, | |
| "loss": 1.4896, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.1374644838316872, | |
| "grad_norm": 0.5908662676811218, | |
| "learning_rate": 4.571686203787196e-05, | |
| "loss": 1.4992, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.13800568258693005, | |
| "grad_norm": 0.7655865550041199, | |
| "learning_rate": 4.589720468890893e-05, | |
| "loss": 1.4911, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.1385468813421729, | |
| "grad_norm": 0.5924785733222961, | |
| "learning_rate": 4.6077547339945895e-05, | |
| "loss": 1.4719, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.13908808009741577, | |
| "grad_norm": 0.6654263138771057, | |
| "learning_rate": 4.625788999098287e-05, | |
| "loss": 1.5109, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.13962927885265863, | |
| "grad_norm": 0.5296297073364258, | |
| "learning_rate": 4.6438232642019843e-05, | |
| "loss": 1.4934, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.1401704776079015, | |
| "grad_norm": 0.5698690414428711, | |
| "learning_rate": 4.661857529305681e-05, | |
| "loss": 1.4954, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.14071167636314436, | |
| "grad_norm": 0.5790325403213501, | |
| "learning_rate": 4.679891794409378e-05, | |
| "loss": 1.4673, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.14125287511838722, | |
| "grad_norm": 0.551480770111084, | |
| "learning_rate": 4.697926059513075e-05, | |
| "loss": 1.476, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.14179407387363008, | |
| "grad_norm": 0.5201780796051025, | |
| "learning_rate": 4.715960324616772e-05, | |
| "loss": 1.4701, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.14233527262887297, | |
| "grad_norm": 0.46442562341690063, | |
| "learning_rate": 4.733994589720469e-05, | |
| "loss": 1.4831, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.14287647138411583, | |
| "grad_norm": 0.5558522939682007, | |
| "learning_rate": 4.752028854824166e-05, | |
| "loss": 1.4729, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.1434176701393587, | |
| "grad_norm": 0.48511791229248047, | |
| "learning_rate": 4.7700631199278636e-05, | |
| "loss": 1.4742, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.14395886889460155, | |
| "grad_norm": 0.5244829058647156, | |
| "learning_rate": 4.7880973850315604e-05, | |
| "loss": 1.4928, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.1445000676498444, | |
| "grad_norm": 0.48878946900367737, | |
| "learning_rate": 4.806131650135257e-05, | |
| "loss": 1.4921, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.14504126640508727, | |
| "grad_norm": 0.5348760485649109, | |
| "learning_rate": 4.824165915238954e-05, | |
| "loss": 1.4917, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.14558246516033013, | |
| "grad_norm": 0.5444923639297485, | |
| "learning_rate": 4.842200180342651e-05, | |
| "loss": 1.4546, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.146123663915573, | |
| "grad_norm": 0.494761198759079, | |
| "learning_rate": 4.860234445446348e-05, | |
| "loss": 1.4751, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.14666486267081585, | |
| "grad_norm": 0.4921441674232483, | |
| "learning_rate": 4.8782687105500455e-05, | |
| "loss": 1.4767, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.14720606142605872, | |
| "grad_norm": 0.48382577300071716, | |
| "learning_rate": 4.896302975653742e-05, | |
| "loss": 1.485, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.14774726018130158, | |
| "grad_norm": 0.4616708755493164, | |
| "learning_rate": 4.9143372407574397e-05, | |
| "loss": 1.4732, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.14828845893654444, | |
| "grad_norm": 0.5030043125152588, | |
| "learning_rate": 4.9323715058611364e-05, | |
| "loss": 1.4799, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.1488296576917873, | |
| "grad_norm": 0.467230886220932, | |
| "learning_rate": 4.950405770964833e-05, | |
| "loss": 1.4594, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.14937085644703016, | |
| "grad_norm": 0.42864304780960083, | |
| "learning_rate": 4.9684400360685306e-05, | |
| "loss": 1.4748, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.14991205520227305, | |
| "grad_norm": 0.43733683228492737, | |
| "learning_rate": 4.986474301172227e-05, | |
| "loss": 1.462, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.1504532539575159, | |
| "grad_norm": 0.45550286769866943, | |
| "learning_rate": 5.004508566275925e-05, | |
| "loss": 1.475, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.15099445271275877, | |
| "grad_norm": 0.44999995827674866, | |
| "learning_rate": 5.022542831379622e-05, | |
| "loss": 1.4794, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.15153565146800163, | |
| "grad_norm": 0.5035279989242554, | |
| "learning_rate": 5.040577096483319e-05, | |
| "loss": 1.471, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.1520768502232445, | |
| "grad_norm": 0.44605591893196106, | |
| "learning_rate": 5.058611361587016e-05, | |
| "loss": 1.4461, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.15261804897848735, | |
| "grad_norm": 0.5482723712921143, | |
| "learning_rate": 5.0766456266907124e-05, | |
| "loss": 1.4597, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.1531592477337302, | |
| "grad_norm": 0.5323627591133118, | |
| "learning_rate": 5.094679891794409e-05, | |
| "loss": 1.4743, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.15370044648897307, | |
| "grad_norm": 0.5289944410324097, | |
| "learning_rate": 5.1127141568981066e-05, | |
| "loss": 1.5, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.15424164524421594, | |
| "grad_norm": 0.5446243286132812, | |
| "learning_rate": 5.1307484220018034e-05, | |
| "loss": 1.4751, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.1547828439994588, | |
| "grad_norm": 0.525830090045929, | |
| "learning_rate": 5.1487826871055015e-05, | |
| "loss": 1.4639, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.15532404275470166, | |
| "grad_norm": 0.48129191994667053, | |
| "learning_rate": 5.166816952209198e-05, | |
| "loss": 1.4652, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.15586524150994452, | |
| "grad_norm": 0.47915297746658325, | |
| "learning_rate": 5.184851217312895e-05, | |
| "loss": 1.4627, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.15640644026518738, | |
| "grad_norm": 0.5229325294494629, | |
| "learning_rate": 5.202885482416592e-05, | |
| "loss": 1.4525, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.15694763902043024, | |
| "grad_norm": 0.5452600121498108, | |
| "learning_rate": 5.2209197475202885e-05, | |
| "loss": 1.458, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.15748883777567313, | |
| "grad_norm": 0.427432656288147, | |
| "learning_rate": 5.238954012623985e-05, | |
| "loss": 1.4773, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.158030036530916, | |
| "grad_norm": 0.450712114572525, | |
| "learning_rate": 5.2569882777276827e-05, | |
| "loss": 1.469, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.15857123528615885, | |
| "grad_norm": 0.5500516891479492, | |
| "learning_rate": 5.27502254283138e-05, | |
| "loss": 1.4603, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.1591124340414017, | |
| "grad_norm": 0.457157164812088, | |
| "learning_rate": 5.2930568079350775e-05, | |
| "loss": 1.4785, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.15965363279664457, | |
| "grad_norm": 0.49750396609306335, | |
| "learning_rate": 5.311091073038774e-05, | |
| "loss": 1.4603, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.16019483155188743, | |
| "grad_norm": 0.5720525979995728, | |
| "learning_rate": 5.329125338142471e-05, | |
| "loss": 1.4753, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.1607360303071303, | |
| "grad_norm": 0.4425548315048218, | |
| "learning_rate": 5.347159603246168e-05, | |
| "loss": 1.462, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.16127722906237316, | |
| "grad_norm": 0.5064132809638977, | |
| "learning_rate": 5.3651938683498645e-05, | |
| "loss": 1.4596, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.16181842781761602, | |
| "grad_norm": 0.518460750579834, | |
| "learning_rate": 5.383228133453562e-05, | |
| "loss": 1.4763, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.16235962657285888, | |
| "grad_norm": 0.4613576829433441, | |
| "learning_rate": 5.401262398557259e-05, | |
| "loss": 1.4487, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.16290082532810174, | |
| "grad_norm": 0.7046213746070862, | |
| "learning_rate": 5.419296663660957e-05, | |
| "loss": 1.472, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.1634420240833446, | |
| "grad_norm": 0.6164196133613586, | |
| "learning_rate": 5.4373309287646535e-05, | |
| "loss": 1.4424, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.16398322283858746, | |
| "grad_norm": 0.5106020569801331, | |
| "learning_rate": 5.45536519386835e-05, | |
| "loss": 1.4567, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.16452442159383032, | |
| "grad_norm": 0.4291236400604248, | |
| "learning_rate": 5.473399458972047e-05, | |
| "loss": 1.4514, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.16506562034907318, | |
| "grad_norm": 0.46577414870262146, | |
| "learning_rate": 5.491433724075744e-05, | |
| "loss": 1.4408, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.16560681910431607, | |
| "grad_norm": 0.4729917049407959, | |
| "learning_rate": 5.509467989179441e-05, | |
| "loss": 1.4493, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.16614801785955893, | |
| "grad_norm": 0.4651925563812256, | |
| "learning_rate": 5.527502254283138e-05, | |
| "loss": 1.465, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.1666892166148018, | |
| "grad_norm": 0.4756859540939331, | |
| "learning_rate": 5.545536519386835e-05, | |
| "loss": 1.4641, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.16723041537004465, | |
| "grad_norm": 0.42555975914001465, | |
| "learning_rate": 5.563570784490533e-05, | |
| "loss": 1.4569, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.16777161412528752, | |
| "grad_norm": 0.5162522196769714, | |
| "learning_rate": 5.5816050495942296e-05, | |
| "loss": 1.4344, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.16831281288053038, | |
| "grad_norm": 0.5867063999176025, | |
| "learning_rate": 5.599639314697926e-05, | |
| "loss": 1.4647, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.16885401163577324, | |
| "grad_norm": 0.6629165410995483, | |
| "learning_rate": 5.617673579801623e-05, | |
| "loss": 1.473, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.1693952103910161, | |
| "grad_norm": 0.5905330777168274, | |
| "learning_rate": 5.6357078449053205e-05, | |
| "loss": 1.4459, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.16993640914625896, | |
| "grad_norm": 0.7457858324050903, | |
| "learning_rate": 5.653742110009017e-05, | |
| "loss": 1.4603, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.17047760790150182, | |
| "grad_norm": 0.5977684855461121, | |
| "learning_rate": 5.671776375112714e-05, | |
| "loss": 1.4621, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.17101880665674468, | |
| "grad_norm": 0.7097992897033691, | |
| "learning_rate": 5.689810640216412e-05, | |
| "loss": 1.4646, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.17156000541198754, | |
| "grad_norm": 0.5895450711250305, | |
| "learning_rate": 5.707844905320109e-05, | |
| "loss": 1.4338, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.1721012041672304, | |
| "grad_norm": 0.576877772808075, | |
| "learning_rate": 5.7258791704238056e-05, | |
| "loss": 1.4666, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.17264240292247326, | |
| "grad_norm": 0.541110098361969, | |
| "learning_rate": 5.7439134355275024e-05, | |
| "loss": 1.4624, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.17318360167771615, | |
| "grad_norm": 0.5172320604324341, | |
| "learning_rate": 5.7619477006312e-05, | |
| "loss": 1.473, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.17372480043295901, | |
| "grad_norm": 0.47511357069015503, | |
| "learning_rate": 5.7799819657348965e-05, | |
| "loss": 1.446, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.17426599918820188, | |
| "grad_norm": 0.48614808917045593, | |
| "learning_rate": 5.798016230838593e-05, | |
| "loss": 1.4394, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.17480719794344474, | |
| "grad_norm": 0.4435577094554901, | |
| "learning_rate": 5.81605049594229e-05, | |
| "loss": 1.43, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.1753483966986876, | |
| "grad_norm": 0.4458653926849365, | |
| "learning_rate": 5.834084761045988e-05, | |
| "loss": 1.46, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.17588959545393046, | |
| "grad_norm": 0.40675726532936096, | |
| "learning_rate": 5.852119026149685e-05, | |
| "loss": 1.4565, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.17643079420917332, | |
| "grad_norm": 0.4132504165172577, | |
| "learning_rate": 5.8701532912533817e-05, | |
| "loss": 1.4522, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.17697199296441618, | |
| "grad_norm": 0.40881386399269104, | |
| "learning_rate": 5.888187556357079e-05, | |
| "loss": 1.4232, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.17751319171965904, | |
| "grad_norm": 0.40527868270874023, | |
| "learning_rate": 5.906221821460776e-05, | |
| "loss": 1.441, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.1780543904749019, | |
| "grad_norm": 0.40227004885673523, | |
| "learning_rate": 5.9242560865644726e-05, | |
| "loss": 1.4259, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.17859558923014476, | |
| "grad_norm": 0.4043656289577484, | |
| "learning_rate": 5.942290351668169e-05, | |
| "loss": 1.4298, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.17913678798538762, | |
| "grad_norm": 0.4288482666015625, | |
| "learning_rate": 5.9603246167718674e-05, | |
| "loss": 1.4439, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.17967798674063049, | |
| "grad_norm": 0.4385060966014862, | |
| "learning_rate": 5.978358881875564e-05, | |
| "loss": 1.4237, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.18021918549587335, | |
| "grad_norm": 0.396980345249176, | |
| "learning_rate": 5.996393146979261e-05, | |
| "loss": 1.4174, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.18076038425111624, | |
| "grad_norm": 0.4060603678226471, | |
| "learning_rate": 6.014427412082958e-05, | |
| "loss": 1.4479, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.1813015830063591, | |
| "grad_norm": 0.4485025703907013, | |
| "learning_rate": 6.032461677186655e-05, | |
| "loss": 1.4493, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.18184278176160196, | |
| "grad_norm": 0.44034305214881897, | |
| "learning_rate": 6.050495942290352e-05, | |
| "loss": 1.4461, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.18238398051684482, | |
| "grad_norm": 0.418074369430542, | |
| "learning_rate": 6.0685302073940486e-05, | |
| "loss": 1.4287, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.18292517927208768, | |
| "grad_norm": 0.41937318444252014, | |
| "learning_rate": 6.0865644724977454e-05, | |
| "loss": 1.4338, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.18346637802733054, | |
| "grad_norm": 0.4103530943393707, | |
| "learning_rate": 6.104598737601444e-05, | |
| "loss": 1.4391, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.1840075767825734, | |
| "grad_norm": 0.4066039025783539, | |
| "learning_rate": 6.122633002705141e-05, | |
| "loss": 1.4357, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.18454877553781626, | |
| "grad_norm": 0.36903437972068787, | |
| "learning_rate": 6.140667267808838e-05, | |
| "loss": 1.4111, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.18508997429305912, | |
| "grad_norm": 0.37125757336616516, | |
| "learning_rate": 6.158701532912534e-05, | |
| "loss": 1.4233, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.18563117304830198, | |
| "grad_norm": 0.44102513790130615, | |
| "learning_rate": 6.176735798016231e-05, | |
| "loss": 1.4437, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 0.18617237180354484, | |
| "grad_norm": 0.4337277114391327, | |
| "learning_rate": 6.194770063119928e-05, | |
| "loss": 1.4425, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.1867135705587877, | |
| "grad_norm": 0.37394315004348755, | |
| "learning_rate": 6.212804328223625e-05, | |
| "loss": 1.4452, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.18725476931403057, | |
| "grad_norm": 0.41764944791793823, | |
| "learning_rate": 6.230838593327321e-05, | |
| "loss": 1.4535, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.18779596806927343, | |
| "grad_norm": 0.4214741289615631, | |
| "learning_rate": 6.24887285843102e-05, | |
| "loss": 1.4391, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 0.18833716682451632, | |
| "grad_norm": 0.4159027338027954, | |
| "learning_rate": 6.266907123534716e-05, | |
| "loss": 1.4197, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.18887836557975918, | |
| "grad_norm": 0.38865673542022705, | |
| "learning_rate": 6.284941388638413e-05, | |
| "loss": 1.4329, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 0.18941956433500204, | |
| "grad_norm": 0.43646490573883057, | |
| "learning_rate": 6.30297565374211e-05, | |
| "loss": 1.4147, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.1899607630902449, | |
| "grad_norm": 0.41997334361076355, | |
| "learning_rate": 6.321009918845807e-05, | |
| "loss": 1.4275, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 0.19050196184548776, | |
| "grad_norm": 0.38556602597236633, | |
| "learning_rate": 6.339044183949505e-05, | |
| "loss": 1.4258, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.19104316060073062, | |
| "grad_norm": 0.42955082654953003, | |
| "learning_rate": 6.357078449053201e-05, | |
| "loss": 1.4201, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 0.19158435935597348, | |
| "grad_norm": 0.3844427764415741, | |
| "learning_rate": 6.3751127141569e-05, | |
| "loss": 1.4448, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 0.19212555811121634, | |
| "grad_norm": 0.4312956929206848, | |
| "learning_rate": 6.393146979260596e-05, | |
| "loss": 1.4051, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.1926667568664592, | |
| "grad_norm": 0.4556865394115448, | |
| "learning_rate": 6.411181244364293e-05, | |
| "loss": 1.4305, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 0.19320795562170207, | |
| "grad_norm": 0.37053731083869934, | |
| "learning_rate": 6.42921550946799e-05, | |
| "loss": 1.4301, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 0.19374915437694493, | |
| "grad_norm": 0.3996010720729828, | |
| "learning_rate": 6.447249774571686e-05, | |
| "loss": 1.4282, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 0.1942903531321878, | |
| "grad_norm": 0.37610816955566406, | |
| "learning_rate": 6.465284039675383e-05, | |
| "loss": 1.4277, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 0.19483155188743065, | |
| "grad_norm": 0.3677166998386383, | |
| "learning_rate": 6.48331830477908e-05, | |
| "loss": 1.4029, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.1953727506426735, | |
| "grad_norm": 0.3841564357280731, | |
| "learning_rate": 6.501352569882777e-05, | |
| "loss": 1.4144, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 0.1959139493979164, | |
| "grad_norm": 0.3687719404697418, | |
| "learning_rate": 6.519386834986475e-05, | |
| "loss": 1.4079, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 0.19645514815315926, | |
| "grad_norm": 0.38350847363471985, | |
| "learning_rate": 6.537421100090172e-05, | |
| "loss": 1.4269, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 0.19699634690840212, | |
| "grad_norm": 0.39060813188552856, | |
| "learning_rate": 6.555455365193868e-05, | |
| "loss": 1.4265, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 0.19753754566364498, | |
| "grad_norm": 0.36068469285964966, | |
| "learning_rate": 6.573489630297565e-05, | |
| "loss": 1.4325, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.19807874441888784, | |
| "grad_norm": 0.41185086965560913, | |
| "learning_rate": 6.591523895401263e-05, | |
| "loss": 1.4348, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 0.1986199431741307, | |
| "grad_norm": 0.4441224932670593, | |
| "learning_rate": 6.60955816050496e-05, | |
| "loss": 1.4103, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 0.19916114192937356, | |
| "grad_norm": 0.3727317452430725, | |
| "learning_rate": 6.627592425608657e-05, | |
| "loss": 1.4188, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.19970234068461643, | |
| "grad_norm": 0.394972562789917, | |
| "learning_rate": 6.645626690712355e-05, | |
| "loss": 1.4095, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 0.20024353943985929, | |
| "grad_norm": 0.40716880559921265, | |
| "learning_rate": 6.663660955816052e-05, | |
| "loss": 1.4127, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.20078473819510215, | |
| "grad_norm": 0.4156644344329834, | |
| "learning_rate": 6.681695220919748e-05, | |
| "loss": 1.4189, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 0.201325936950345, | |
| "grad_norm": 0.3787958323955536, | |
| "learning_rate": 6.699729486023445e-05, | |
| "loss": 1.4221, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 0.20186713570558787, | |
| "grad_norm": 0.42427608370780945, | |
| "learning_rate": 6.717763751127142e-05, | |
| "loss": 1.4192, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 0.20240833446083073, | |
| "grad_norm": 0.4778277277946472, | |
| "learning_rate": 6.735798016230839e-05, | |
| "loss": 1.4024, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 0.2029495332160736, | |
| "grad_norm": 0.44801151752471924, | |
| "learning_rate": 6.753832281334535e-05, | |
| "loss": 1.4222, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.20349073197131648, | |
| "grad_norm": 0.46737611293792725, | |
| "learning_rate": 6.771866546438232e-05, | |
| "loss": 1.4117, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 0.20403193072655934, | |
| "grad_norm": 0.4184872806072235, | |
| "learning_rate": 6.78990081154193e-05, | |
| "loss": 1.4066, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 0.2045731294818022, | |
| "grad_norm": 0.40458211302757263, | |
| "learning_rate": 6.807935076645627e-05, | |
| "loss": 1.4274, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 0.20511432823704506, | |
| "grad_norm": 0.43926185369491577, | |
| "learning_rate": 6.825969341749324e-05, | |
| "loss": 1.4231, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 0.20565552699228792, | |
| "grad_norm": 0.4434867203235626, | |
| "learning_rate": 6.844003606853022e-05, | |
| "loss": 1.4121, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.20619672574753078, | |
| "grad_norm": 0.4500143826007843, | |
| "learning_rate": 6.862037871956719e-05, | |
| "loss": 1.4179, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 0.20673792450277365, | |
| "grad_norm": 0.45456650853157043, | |
| "learning_rate": 6.880072137060415e-05, | |
| "loss": 1.3912, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 0.2072791232580165, | |
| "grad_norm": 0.4214187264442444, | |
| "learning_rate": 6.898106402164112e-05, | |
| "loss": 1.3962, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 0.20782032201325937, | |
| "grad_norm": 0.427682101726532, | |
| "learning_rate": 6.916140667267809e-05, | |
| "loss": 1.4316, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.20836152076850223, | |
| "grad_norm": 0.44491469860076904, | |
| "learning_rate": 6.934174932371507e-05, | |
| "loss": 1.4218, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.2089027195237451, | |
| "grad_norm": 0.42736080288887024, | |
| "learning_rate": 6.952209197475204e-05, | |
| "loss": 1.3931, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 0.20944391827898795, | |
| "grad_norm": 0.4041571021080017, | |
| "learning_rate": 6.9702434625789e-05, | |
| "loss": 1.4201, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 0.2099851170342308, | |
| "grad_norm": 0.4250961244106293, | |
| "learning_rate": 6.988277727682597e-05, | |
| "loss": 1.4299, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 0.21052631578947367, | |
| "grad_norm": 0.4335261881351471, | |
| "learning_rate": 7.006311992786294e-05, | |
| "loss": 1.4125, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 0.21106751454471653, | |
| "grad_norm": 0.42000851035118103, | |
| "learning_rate": 7.02434625788999e-05, | |
| "loss": 1.3969, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.21160871329995942, | |
| "grad_norm": 0.38111838698387146, | |
| "learning_rate": 7.042380522993687e-05, | |
| "loss": 1.3795, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 0.21214991205520228, | |
| "grad_norm": 0.38366812467575073, | |
| "learning_rate": 7.060414788097385e-05, | |
| "loss": 1.4041, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 0.21269111081044514, | |
| "grad_norm": 0.4334602355957031, | |
| "learning_rate": 7.078449053201082e-05, | |
| "loss": 1.415, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 0.213232309565688, | |
| "grad_norm": 0.40296411514282227, | |
| "learning_rate": 7.096483318304779e-05, | |
| "loss": 1.4052, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 0.21377350832093087, | |
| "grad_norm": 0.4197232723236084, | |
| "learning_rate": 7.114517583408477e-05, | |
| "loss": 1.4205, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.21431470707617373, | |
| "grad_norm": 0.40287715196609497, | |
| "learning_rate": 7.132551848512174e-05, | |
| "loss": 1.4047, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 0.2148559058314166, | |
| "grad_norm": 0.37324196100234985, | |
| "learning_rate": 7.15058611361587e-05, | |
| "loss": 1.4398, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 0.21539710458665945, | |
| "grad_norm": 0.4409985840320587, | |
| "learning_rate": 7.168620378719567e-05, | |
| "loss": 1.3873, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 0.2159383033419023, | |
| "grad_norm": 0.41441893577575684, | |
| "learning_rate": 7.186654643823264e-05, | |
| "loss": 1.4174, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 0.21647950209714517, | |
| "grad_norm": 0.4271719455718994, | |
| "learning_rate": 7.204688908926962e-05, | |
| "loss": 1.3987, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.21702070085238803, | |
| "grad_norm": 0.4969992935657501, | |
| "learning_rate": 7.222723174030659e-05, | |
| "loss": 1.4049, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 0.2175618996076309, | |
| "grad_norm": 0.45711180567741394, | |
| "learning_rate": 7.240757439134356e-05, | |
| "loss": 1.4061, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 0.21810309836287375, | |
| "grad_norm": 0.4479979872703552, | |
| "learning_rate": 7.258791704238052e-05, | |
| "loss": 1.4049, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 0.21864429711811662, | |
| "grad_norm": 0.4708006978034973, | |
| "learning_rate": 7.276825969341749e-05, | |
| "loss": 1.3971, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 0.2191854958733595, | |
| "grad_norm": 0.4387456774711609, | |
| "learning_rate": 7.294860234445446e-05, | |
| "loss": 1.4272, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.21972669462860236, | |
| "grad_norm": 0.5285756587982178, | |
| "learning_rate": 7.312894499549143e-05, | |
| "loss": 1.3902, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 0.22026789338384523, | |
| "grad_norm": 0.5111876726150513, | |
| "learning_rate": 7.330928764652841e-05, | |
| "loss": 1.4176, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 0.2208090921390881, | |
| "grad_norm": 0.4643821716308594, | |
| "learning_rate": 7.348963029756538e-05, | |
| "loss": 1.4216, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 0.22135029089433095, | |
| "grad_norm": 0.5162214040756226, | |
| "learning_rate": 7.366997294860236e-05, | |
| "loss": 1.4025, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 0.2218914896495738, | |
| "grad_norm": 0.4296860992908478, | |
| "learning_rate": 7.385031559963932e-05, | |
| "loss": 1.3919, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.22243268840481667, | |
| "grad_norm": 0.4449775815010071, | |
| "learning_rate": 7.403065825067629e-05, | |
| "loss": 1.4002, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 0.22297388716005953, | |
| "grad_norm": 0.39713212847709656, | |
| "learning_rate": 7.421100090171326e-05, | |
| "loss": 1.4012, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 0.2235150859153024, | |
| "grad_norm": 0.41655346751213074, | |
| "learning_rate": 7.439134355275023e-05, | |
| "loss": 1.4155, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 0.22405628467054525, | |
| "grad_norm": 0.3751365542411804, | |
| "learning_rate": 7.45716862037872e-05, | |
| "loss": 1.4021, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 0.2245974834257881, | |
| "grad_norm": 0.41483408212661743, | |
| "learning_rate": 7.475202885482417e-05, | |
| "loss": 1.4207, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.22513868218103097, | |
| "grad_norm": 0.397360235452652, | |
| "learning_rate": 7.493237150586114e-05, | |
| "loss": 1.392, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 0.22567988093627384, | |
| "grad_norm": 0.3874877691268921, | |
| "learning_rate": 7.511271415689811e-05, | |
| "loss": 1.4143, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 0.2262210796915167, | |
| "grad_norm": 0.4382254481315613, | |
| "learning_rate": 7.529305680793508e-05, | |
| "loss": 1.4109, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 0.22676227844675959, | |
| "grad_norm": 0.3728530704975128, | |
| "learning_rate": 7.547339945897204e-05, | |
| "loss": 1.4215, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 0.22730347720200245, | |
| "grad_norm": 0.41155338287353516, | |
| "learning_rate": 7.565374211000901e-05, | |
| "loss": 1.3963, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.2278446759572453, | |
| "grad_norm": 0.3550320267677307, | |
| "learning_rate": 7.5834084761046e-05, | |
| "loss": 1.3998, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 0.22838587471248817, | |
| "grad_norm": 0.3858035206794739, | |
| "learning_rate": 7.601442741208296e-05, | |
| "loss": 1.387, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 0.22892707346773103, | |
| "grad_norm": 0.38636457920074463, | |
| "learning_rate": 7.619477006311994e-05, | |
| "loss": 1.387, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 0.2294682722229739, | |
| "grad_norm": 0.41915518045425415, | |
| "learning_rate": 7.637511271415691e-05, | |
| "loss": 1.3917, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 0.23000947097821675, | |
| "grad_norm": 0.35796865820884705, | |
| "learning_rate": 7.655545536519388e-05, | |
| "loss": 1.406, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.2305506697334596, | |
| "grad_norm": 0.35221853852272034, | |
| "learning_rate": 7.673579801623084e-05, | |
| "loss": 1.3892, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 0.23109186848870247, | |
| "grad_norm": 0.3815077245235443, | |
| "learning_rate": 7.691614066726781e-05, | |
| "loss": 1.3845, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 0.23163306724394533, | |
| "grad_norm": 0.3554491400718689, | |
| "learning_rate": 7.709648331830478e-05, | |
| "loss": 1.3644, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 0.2321742659991882, | |
| "grad_norm": 0.3762814998626709, | |
| "learning_rate": 7.727682596934175e-05, | |
| "loss": 1.3976, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 0.23271546475443106, | |
| "grad_norm": 0.34575173258781433, | |
| "learning_rate": 7.745716862037873e-05, | |
| "loss": 1.3925, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.23325666350967392, | |
| "grad_norm": 0.37864556908607483, | |
| "learning_rate": 7.76375112714157e-05, | |
| "loss": 1.3993, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 0.23379786226491678, | |
| "grad_norm": 0.34448474645614624, | |
| "learning_rate": 7.781785392245266e-05, | |
| "loss": 1.3855, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 0.23433906102015967, | |
| "grad_norm": 0.40932390093803406, | |
| "learning_rate": 7.799819657348963e-05, | |
| "loss": 1.395, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 0.23488025977540253, | |
| "grad_norm": 0.3737650513648987, | |
| "learning_rate": 7.81785392245266e-05, | |
| "loss": 1.3918, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 0.2354214585306454, | |
| "grad_norm": 0.42988118529319763, | |
| "learning_rate": 7.835888187556357e-05, | |
| "loss": 1.3837, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.23596265728588825, | |
| "grad_norm": 0.3865496814250946, | |
| "learning_rate": 7.853922452660055e-05, | |
| "loss": 1.3976, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 0.2365038560411311, | |
| "grad_norm": 0.3682670295238495, | |
| "learning_rate": 7.871956717763751e-05, | |
| "loss": 1.3792, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 0.23704505479637397, | |
| "grad_norm": 0.4236462712287903, | |
| "learning_rate": 7.88999098286745e-05, | |
| "loss": 1.4032, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 0.23758625355161683, | |
| "grad_norm": 0.3742213249206543, | |
| "learning_rate": 7.908025247971146e-05, | |
| "loss": 1.3709, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 0.2381274523068597, | |
| "grad_norm": 0.38234424591064453, | |
| "learning_rate": 7.926059513074843e-05, | |
| "loss": 1.3862, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.23866865106210255, | |
| "grad_norm": 0.37414151430130005, | |
| "learning_rate": 7.94409377817854e-05, | |
| "loss": 1.3751, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 0.23920984981734542, | |
| "grad_norm": 0.3838132619857788, | |
| "learning_rate": 7.962128043282237e-05, | |
| "loss": 1.3805, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 0.23975104857258828, | |
| "grad_norm": 0.3818622827529907, | |
| "learning_rate": 7.980162308385933e-05, | |
| "loss": 1.3735, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 0.24029224732783114, | |
| "grad_norm": 0.38791927695274353, | |
| "learning_rate": 7.99819657348963e-05, | |
| "loss": 1.3958, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 0.240833446083074, | |
| "grad_norm": 0.4164978861808777, | |
| "learning_rate": 8.016230838593328e-05, | |
| "loss": 1.421, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.24137464483831686, | |
| "grad_norm": 0.3721414804458618, | |
| "learning_rate": 8.034265103697025e-05, | |
| "loss": 1.3977, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 0.24191584359355975, | |
| "grad_norm": 0.37698984146118164, | |
| "learning_rate": 8.052299368800722e-05, | |
| "loss": 1.3854, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 0.2424570423488026, | |
| "grad_norm": 0.3553116023540497, | |
| "learning_rate": 8.070333633904418e-05, | |
| "loss": 1.3925, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 0.24299824110404547, | |
| "grad_norm": 0.37809059023857117, | |
| "learning_rate": 8.088367899008115e-05, | |
| "loss": 1.368, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 0.24353943985928833, | |
| "grad_norm": 0.3835943043231964, | |
| "learning_rate": 8.106402164111813e-05, | |
| "loss": 1.3992, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.2440806386145312, | |
| "grad_norm": 0.4013379216194153, | |
| "learning_rate": 8.12443642921551e-05, | |
| "loss": 1.3912, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 0.24462183736977405, | |
| "grad_norm": 0.37845560908317566, | |
| "learning_rate": 8.142470694319207e-05, | |
| "loss": 1.3934, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 0.24516303612501691, | |
| "grad_norm": 0.39762255549430847, | |
| "learning_rate": 8.160504959422905e-05, | |
| "loss": 1.3782, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 0.24570423488025978, | |
| "grad_norm": 0.36652496457099915, | |
| "learning_rate": 8.178539224526602e-05, | |
| "loss": 1.3787, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 0.24624543363550264, | |
| "grad_norm": 0.39953047037124634, | |
| "learning_rate": 8.196573489630298e-05, | |
| "loss": 1.3752, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.2467866323907455, | |
| "grad_norm": 0.35875022411346436, | |
| "learning_rate": 8.214607754733995e-05, | |
| "loss": 1.3768, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 0.24732783114598836, | |
| "grad_norm": 0.3617067337036133, | |
| "learning_rate": 8.232642019837692e-05, | |
| "loss": 1.3859, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 0.24786902990123122, | |
| "grad_norm": 0.38250839710235596, | |
| "learning_rate": 8.250676284941389e-05, | |
| "loss": 1.3897, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 0.24841022865647408, | |
| "grad_norm": 0.3404116928577423, | |
| "learning_rate": 8.268710550045085e-05, | |
| "loss": 1.3933, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 0.24895142741171694, | |
| "grad_norm": 0.3547706604003906, | |
| "learning_rate": 8.286744815148782e-05, | |
| "loss": 1.3787, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.2494926261669598, | |
| "grad_norm": 0.32752275466918945, | |
| "learning_rate": 8.30477908025248e-05, | |
| "loss": 1.3905, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 0.25003382492220266, | |
| "grad_norm": 0.3413980007171631, | |
| "learning_rate": 8.322813345356177e-05, | |
| "loss": 1.385, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 0.25057502367744555, | |
| "grad_norm": 0.5574982762336731, | |
| "learning_rate": 8.340847610459874e-05, | |
| "loss": 1.3869, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 0.2511162224326884, | |
| "grad_norm": 0.41128844022750854, | |
| "learning_rate": 8.358881875563572e-05, | |
| "loss": 1.3583, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 0.2516574211879313, | |
| "grad_norm": 0.3476073145866394, | |
| "learning_rate": 8.376916140667269e-05, | |
| "loss": 1.3832, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.2521986199431741, | |
| "grad_norm": 0.34838998317718506, | |
| "learning_rate": 8.394950405770965e-05, | |
| "loss": 1.3748, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 0.252739818698417, | |
| "grad_norm": 0.3552824556827545, | |
| "learning_rate": 8.412984670874662e-05, | |
| "loss": 1.3936, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 0.25328101745365983, | |
| "grad_norm": 0.34918278455734253, | |
| "learning_rate": 8.43101893597836e-05, | |
| "loss": 1.3733, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 0.2538222162089027, | |
| "grad_norm": 0.431455135345459, | |
| "learning_rate": 8.449053201082057e-05, | |
| "loss": 1.3924, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 0.2543634149641456, | |
| "grad_norm": 0.37811046838760376, | |
| "learning_rate": 8.467087466185754e-05, | |
| "loss": 1.3861, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.25490461371938844, | |
| "grad_norm": 0.35659778118133545, | |
| "learning_rate": 8.48512173128945e-05, | |
| "loss": 1.3736, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 0.25544581247463133, | |
| "grad_norm": 0.4327319264411926, | |
| "learning_rate": 8.503155996393147e-05, | |
| "loss": 1.3883, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 0.25598701122987416, | |
| "grad_norm": 0.39134231209754944, | |
| "learning_rate": 8.521190261496844e-05, | |
| "loss": 1.3704, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 0.25652820998511705, | |
| "grad_norm": 0.39573270082473755, | |
| "learning_rate": 8.53922452660054e-05, | |
| "loss": 1.4047, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 0.2570694087403599, | |
| "grad_norm": 0.3299993872642517, | |
| "learning_rate": 8.557258791704237e-05, | |
| "loss": 1.3778, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.2576106074956028, | |
| "grad_norm": 0.3559456765651703, | |
| "learning_rate": 8.575293056807936e-05, | |
| "loss": 1.3794, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 0.2581518062508456, | |
| "grad_norm": 0.36347028613090515, | |
| "learning_rate": 8.593327321911632e-05, | |
| "loss": 1.3817, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 0.2586930050060885, | |
| "grad_norm": 0.39882585406303406, | |
| "learning_rate": 8.611361587015329e-05, | |
| "loss": 1.3565, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 0.2592342037613313, | |
| "grad_norm": 0.3932117223739624, | |
| "learning_rate": 8.629395852119027e-05, | |
| "loss": 1.396, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 0.2597754025165742, | |
| "grad_norm": 0.3526294231414795, | |
| "learning_rate": 8.647430117222724e-05, | |
| "loss": 1.3624, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.26031660127181705, | |
| "grad_norm": 0.3804738223552704, | |
| "learning_rate": 8.66546438232642e-05, | |
| "loss": 1.3616, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 0.26085780002705994, | |
| "grad_norm": 0.36557725071907043, | |
| "learning_rate": 8.683498647430117e-05, | |
| "loss": 1.3997, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 0.2613989987823028, | |
| "grad_norm": 0.3574380874633789, | |
| "learning_rate": 8.701532912533815e-05, | |
| "loss": 1.3901, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 0.26194019753754566, | |
| "grad_norm": 0.4025056064128876, | |
| "learning_rate": 8.719567177637512e-05, | |
| "loss": 1.3707, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 0.26248139629278855, | |
| "grad_norm": 0.3687063157558441, | |
| "learning_rate": 8.737601442741209e-05, | |
| "loss": 1.3679, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.2630225950480314, | |
| "grad_norm": 0.3697878420352936, | |
| "learning_rate": 8.755635707844906e-05, | |
| "loss": 1.3981, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 0.26356379380327427, | |
| "grad_norm": 0.34241798520088196, | |
| "learning_rate": 8.773669972948602e-05, | |
| "loss": 1.3728, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 0.2641049925585171, | |
| "grad_norm": 0.40002745389938354, | |
| "learning_rate": 8.791704238052299e-05, | |
| "loss": 1.3732, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 0.26464619131376, | |
| "grad_norm": 0.42943906784057617, | |
| "learning_rate": 8.809738503155996e-05, | |
| "loss": 1.3731, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 0.2651873900690028, | |
| "grad_norm": 0.37437063455581665, | |
| "learning_rate": 8.827772768259693e-05, | |
| "loss": 1.372, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.2657285888242457, | |
| "grad_norm": 0.3378891944885254, | |
| "learning_rate": 8.845807033363391e-05, | |
| "loss": 1.3777, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 0.26626978757948855, | |
| "grad_norm": 0.32884734869003296, | |
| "learning_rate": 8.863841298467088e-05, | |
| "loss": 1.3639, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 0.26681098633473144, | |
| "grad_norm": 0.3945903480052948, | |
| "learning_rate": 8.881875563570786e-05, | |
| "loss": 1.3722, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 0.26735218508997427, | |
| "grad_norm": 0.39569205045700073, | |
| "learning_rate": 8.899909828674482e-05, | |
| "loss": 1.376, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 0.26789338384521716, | |
| "grad_norm": 0.31659135222435, | |
| "learning_rate": 8.917944093778179e-05, | |
| "loss": 1.3807, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.26843458260046, | |
| "grad_norm": 0.44032666087150574, | |
| "learning_rate": 8.935978358881876e-05, | |
| "loss": 1.3986, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 0.2689757813557029, | |
| "grad_norm": 0.3445993661880493, | |
| "learning_rate": 8.954012623985573e-05, | |
| "loss": 1.3589, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 0.26951698011094577, | |
| "grad_norm": 0.3693557679653168, | |
| "learning_rate": 8.97204688908927e-05, | |
| "loss": 1.3593, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 0.2700581788661886, | |
| "grad_norm": 0.3965442478656769, | |
| "learning_rate": 8.990081154192968e-05, | |
| "loss": 1.3909, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 0.2705993776214315, | |
| "grad_norm": 0.4038390815258026, | |
| "learning_rate": 9.008115419296664e-05, | |
| "loss": 1.3629, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.2711405763766743, | |
| "grad_norm": 0.36394256353378296, | |
| "learning_rate": 9.026149684400361e-05, | |
| "loss": 1.3812, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 0.2716817751319172, | |
| "grad_norm": 0.4527181386947632, | |
| "learning_rate": 9.044183949504058e-05, | |
| "loss": 1.3692, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 0.27222297388716005, | |
| "grad_norm": 0.37700143456459045, | |
| "learning_rate": 9.062218214607755e-05, | |
| "loss": 1.3652, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 0.27276417264240294, | |
| "grad_norm": 0.45016244053840637, | |
| "learning_rate": 9.080252479711451e-05, | |
| "loss": 1.3657, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 0.27330537139764577, | |
| "grad_norm": 0.42159709334373474, | |
| "learning_rate": 9.09828674481515e-05, | |
| "loss": 1.3702, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.27384657015288866, | |
| "grad_norm": 0.3884572982788086, | |
| "learning_rate": 9.116321009918846e-05, | |
| "loss": 1.3535, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 0.2743877689081315, | |
| "grad_norm": 0.37507420778274536, | |
| "learning_rate": 9.134355275022544e-05, | |
| "loss": 1.3659, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 0.2749289676633744, | |
| "grad_norm": 0.35269656777381897, | |
| "learning_rate": 9.152389540126241e-05, | |
| "loss": 1.3623, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 0.2754701664186172, | |
| "grad_norm": 0.3543412387371063, | |
| "learning_rate": 9.170423805229938e-05, | |
| "loss": 1.3695, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 0.2760113651738601, | |
| "grad_norm": 0.3173674941062927, | |
| "learning_rate": 9.188458070333635e-05, | |
| "loss": 1.3572, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.276552563929103, | |
| "grad_norm": 0.3729746341705322, | |
| "learning_rate": 9.206492335437331e-05, | |
| "loss": 1.3888, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 0.2770937626843458, | |
| "grad_norm": 0.33210429549217224, | |
| "learning_rate": 9.224526600541028e-05, | |
| "loss": 1.3395, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 0.2776349614395887, | |
| "grad_norm": 0.338366836309433, | |
| "learning_rate": 9.242560865644725e-05, | |
| "loss": 1.3498, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 0.27817616019483155, | |
| "grad_norm": 0.3367864191532135, | |
| "learning_rate": 9.260595130748423e-05, | |
| "loss": 1.3548, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 0.27871735895007443, | |
| "grad_norm": 0.40313002467155457, | |
| "learning_rate": 9.27862939585212e-05, | |
| "loss": 1.4059, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.27925855770531727, | |
| "grad_norm": 0.3434394299983978, | |
| "learning_rate": 9.296663660955816e-05, | |
| "loss": 1.3522, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 0.27979975646056016, | |
| "grad_norm": 0.35454580187797546, | |
| "learning_rate": 9.314697926059513e-05, | |
| "loss": 1.3838, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 0.280340955215803, | |
| "grad_norm": 0.3280038833618164, | |
| "learning_rate": 9.33273219116321e-05, | |
| "loss": 1.3753, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 0.2808821539710459, | |
| "grad_norm": 0.4306875169277191, | |
| "learning_rate": 9.350766456266907e-05, | |
| "loss": 1.3807, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 0.2814233527262887, | |
| "grad_norm": 0.3500923812389374, | |
| "learning_rate": 9.368800721370605e-05, | |
| "loss": 1.36, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.2819645514815316, | |
| "grad_norm": 0.3702130913734436, | |
| "learning_rate": 9.386834986474301e-05, | |
| "loss": 1.3919, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 0.28250575023677443, | |
| "grad_norm": 0.3651416599750519, | |
| "learning_rate": 9.404869251578e-05, | |
| "loss": 1.3805, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 0.2830469489920173, | |
| "grad_norm": 0.35927796363830566, | |
| "learning_rate": 9.422903516681696e-05, | |
| "loss": 1.3507, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 0.28358814774726016, | |
| "grad_norm": 0.36750975251197815, | |
| "learning_rate": 9.440937781785393e-05, | |
| "loss": 1.3475, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 0.28412934650250304, | |
| "grad_norm": 0.31946998834609985, | |
| "learning_rate": 9.45897204688909e-05, | |
| "loss": 1.3708, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.28467054525774593, | |
| "grad_norm": 0.3447932302951813, | |
| "learning_rate": 9.477006311992787e-05, | |
| "loss": 1.3519, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 0.28521174401298877, | |
| "grad_norm": 0.31405511498451233, | |
| "learning_rate": 9.495040577096483e-05, | |
| "loss": 1.3806, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 0.28575294276823165, | |
| "grad_norm": 0.3198442757129669, | |
| "learning_rate": 9.51307484220018e-05, | |
| "loss": 1.368, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 0.2862941415234745, | |
| "grad_norm": 0.33328956365585327, | |
| "learning_rate": 9.531109107303878e-05, | |
| "loss": 1.3429, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 0.2868353402787174, | |
| "grad_norm": 0.29432907700538635, | |
| "learning_rate": 9.549143372407575e-05, | |
| "loss": 1.3698, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.2873765390339602, | |
| "grad_norm": 0.3468937575817108, | |
| "learning_rate": 9.567177637511272e-05, | |
| "loss": 1.356, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 0.2879177377892031, | |
| "grad_norm": 0.3619658350944519, | |
| "learning_rate": 9.585211902614968e-05, | |
| "loss": 1.3596, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 0.28845893654444593, | |
| "grad_norm": 0.3384917378425598, | |
| "learning_rate": 9.603246167718665e-05, | |
| "loss": 1.3693, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 0.2890001352996888, | |
| "grad_norm": 0.3724029064178467, | |
| "learning_rate": 9.621280432822363e-05, | |
| "loss": 1.3639, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 0.28954133405493165, | |
| "grad_norm": 0.7029115557670593, | |
| "learning_rate": 9.63931469792606e-05, | |
| "loss": 1.3557, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.29008253281017454, | |
| "grad_norm": 0.5529230833053589, | |
| "learning_rate": 9.657348963029757e-05, | |
| "loss": 1.3657, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 0.2906237315654174, | |
| "grad_norm": 0.4254820644855499, | |
| "learning_rate": 9.675383228133455e-05, | |
| "loss": 1.3633, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 0.29116493032066026, | |
| "grad_norm": 0.4930615723133087, | |
| "learning_rate": 9.693417493237152e-05, | |
| "loss": 1.3714, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 0.2917061290759031, | |
| "grad_norm": 0.4455857574939728, | |
| "learning_rate": 9.711451758340848e-05, | |
| "loss": 1.3615, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 0.292247327831146, | |
| "grad_norm": 0.4171796441078186, | |
| "learning_rate": 9.729486023444545e-05, | |
| "loss": 1.3673, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.2927885265863889, | |
| "grad_norm": 0.37810683250427246, | |
| "learning_rate": 9.747520288548242e-05, | |
| "loss": 1.3683, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 0.2933297253416317, | |
| "grad_norm": 0.4057900905609131, | |
| "learning_rate": 9.765554553651939e-05, | |
| "loss": 1.3674, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 0.2938709240968746, | |
| "grad_norm": 0.40583640336990356, | |
| "learning_rate": 9.783588818755635e-05, | |
| "loss": 1.3566, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 0.29441212285211743, | |
| "grad_norm": 0.39454150199890137, | |
| "learning_rate": 9.801623083859334e-05, | |
| "loss": 1.3611, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 0.2949533216073603, | |
| "grad_norm": 0.42229679226875305, | |
| "learning_rate": 9.81965734896303e-05, | |
| "loss": 1.3726, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.29549452036260315, | |
| "grad_norm": 0.3274170160293579, | |
| "learning_rate": 9.837691614066727e-05, | |
| "loss": 1.3375, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 0.29603571911784604, | |
| "grad_norm": 0.40999388694763184, | |
| "learning_rate": 9.855725879170424e-05, | |
| "loss": 1.3548, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 0.2965769178730889, | |
| "grad_norm": 0.33515796065330505, | |
| "learning_rate": 9.873760144274122e-05, | |
| "loss": 1.3903, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 0.29711811662833176, | |
| "grad_norm": 0.3834095597267151, | |
| "learning_rate": 9.891794409377819e-05, | |
| "loss": 1.3653, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 0.2976593153835746, | |
| "grad_norm": 0.34850651025772095, | |
| "learning_rate": 9.909828674481515e-05, | |
| "loss": 1.3573, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.2982005141388175, | |
| "grad_norm": 0.3811749815940857, | |
| "learning_rate": 9.927862939585212e-05, | |
| "loss": 1.3843, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 0.2987417128940603, | |
| "grad_norm": 0.3308597803115845, | |
| "learning_rate": 9.94589720468891e-05, | |
| "loss": 1.3492, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 0.2992829116493032, | |
| "grad_norm": 0.31952470541000366, | |
| "learning_rate": 9.963931469792607e-05, | |
| "loss": 1.3586, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 0.2998241104045461, | |
| "grad_norm": 0.3433592915534973, | |
| "learning_rate": 9.981965734896304e-05, | |
| "loss": 1.3524, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 0.30036530915978893, | |
| "grad_norm": 0.4547680914402008, | |
| "learning_rate": 0.0001, | |
| "loss": 1.3562, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.3009065079150318, | |
| "grad_norm": 0.4963592290878296, | |
| "learning_rate": 9.999999008881264e-05, | |
| "loss": 1.3452, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 0.30144770667027465, | |
| "grad_norm": 1.1111193895339966, | |
| "learning_rate": 9.999996035525452e-05, | |
| "loss": 1.3732, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 0.30198890542551754, | |
| "grad_norm": 0.6860964298248291, | |
| "learning_rate": 9.999991079933739e-05, | |
| "loss": 1.3689, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 0.3025301041807604, | |
| "grad_norm": 0.7344204783439636, | |
| "learning_rate": 9.999984142108093e-05, | |
| "loss": 1.3575, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 0.30307130293600326, | |
| "grad_norm": 0.6534725427627563, | |
| "learning_rate": 9.999975222051263e-05, | |
| "loss": 1.376, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.3036125016912461, | |
| "grad_norm": 0.5108229517936707, | |
| "learning_rate": 9.999964319766785e-05, | |
| "loss": 1.3741, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 0.304153700446489, | |
| "grad_norm": 0.4888688325881958, | |
| "learning_rate": 9.99995143525898e-05, | |
| "loss": 1.3555, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 0.3046948992017318, | |
| "grad_norm": 0.42808806896209717, | |
| "learning_rate": 9.999936568532962e-05, | |
| "loss": 1.3548, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 0.3052360979569747, | |
| "grad_norm": 0.3921727240085602, | |
| "learning_rate": 9.999919719594617e-05, | |
| "loss": 1.3559, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 0.30577729671221754, | |
| "grad_norm": 0.3473529517650604, | |
| "learning_rate": 9.999900888450628e-05, | |
| "loss": 1.3603, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.3063184954674604, | |
| "grad_norm": 0.3337381184101105, | |
| "learning_rate": 9.999880075108464e-05, | |
| "loss": 1.3642, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 0.30685969422270326, | |
| "grad_norm": 0.3363231122493744, | |
| "learning_rate": 9.99985727957637e-05, | |
| "loss": 1.3606, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 0.30740089297794615, | |
| "grad_norm": 0.32726484537124634, | |
| "learning_rate": 9.999832501863386e-05, | |
| "loss": 1.3493, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 0.30794209173318904, | |
| "grad_norm": 0.3190646767616272, | |
| "learning_rate": 9.999805741979338e-05, | |
| "loss": 1.3518, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 0.30848329048843187, | |
| "grad_norm": 0.31244540214538574, | |
| "learning_rate": 9.999776999934831e-05, | |
| "loss": 1.3495, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.30902448924367476, | |
| "grad_norm": 0.3286384344100952, | |
| "learning_rate": 9.999746275741261e-05, | |
| "loss": 1.3517, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 0.3095656879989176, | |
| "grad_norm": 0.3630046546459198, | |
| "learning_rate": 9.99971356941081e-05, | |
| "loss": 1.3641, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 0.3101068867541605, | |
| "grad_norm": 0.30771151185035706, | |
| "learning_rate": 9.999678880956443e-05, | |
| "loss": 1.3571, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 0.3106480855094033, | |
| "grad_norm": 0.30026301741600037, | |
| "learning_rate": 9.99964221039191e-05, | |
| "loss": 1.3541, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 0.3111892842646462, | |
| "grad_norm": 0.3128298223018646, | |
| "learning_rate": 9.999603557731754e-05, | |
| "loss": 1.3556, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.31173048301988904, | |
| "grad_norm": 0.30185452103614807, | |
| "learning_rate": 9.999562922991293e-05, | |
| "loss": 1.3484, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 0.3122716817751319, | |
| "grad_norm": 0.3274635076522827, | |
| "learning_rate": 9.99952030618664e-05, | |
| "loss": 1.3729, | |
| "step": 1154 | |
| }, | |
| { | |
| "epoch": 0.31281288053037476, | |
| "grad_norm": 0.30549076199531555, | |
| "learning_rate": 9.999475707334692e-05, | |
| "loss": 1.3642, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 0.31335407928561765, | |
| "grad_norm": 0.3147718906402588, | |
| "learning_rate": 9.999429126453126e-05, | |
| "loss": 1.3493, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 0.3138952780408605, | |
| "grad_norm": 0.6205586791038513, | |
| "learning_rate": 9.99938056356041e-05, | |
| "loss": 1.3623, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.31443647679610337, | |
| "grad_norm": 0.3471706211566925, | |
| "learning_rate": 9.999330018675798e-05, | |
| "loss": 1.3533, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 0.31497767555134626, | |
| "grad_norm": 1.3515815734863281, | |
| "learning_rate": 9.999277491819328e-05, | |
| "loss": 1.3565, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 0.3155188743065891, | |
| "grad_norm": 733.9155883789062, | |
| "learning_rate": 9.999222983011824e-05, | |
| "loss": 5.2143, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 0.316060073061832, | |
| "grad_norm": 2.9439170360565186, | |
| "learning_rate": 9.999166492274894e-05, | |
| "loss": 1.4438, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 0.3166012718170748, | |
| "grad_norm": 1.5871142148971558, | |
| "learning_rate": 9.999108019630938e-05, | |
| "loss": 1.4426, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.3171424705723177, | |
| "grad_norm": 711.9217529296875, | |
| "learning_rate": 9.999047565103132e-05, | |
| "loss": 3.6935, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 0.31768366932756054, | |
| "grad_norm": 100.76264953613281, | |
| "learning_rate": 9.998985128715448e-05, | |
| "loss": 4.2396, | |
| "step": 1174 | |
| }, | |
| { | |
| "epoch": 0.3182248680828034, | |
| "grad_norm": 108.88189697265625, | |
| "learning_rate": 9.998920710492634e-05, | |
| "loss": 4.9929, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 0.31876606683804626, | |
| "grad_norm": 72.18595123291016, | |
| "learning_rate": 9.998854310460233e-05, | |
| "loss": 6.0375, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 0.31930726559328915, | |
| "grad_norm": 59.48538589477539, | |
| "learning_rate": 9.998785928644567e-05, | |
| "loss": 5.8932, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.319848464348532, | |
| "grad_norm": 36.32703399658203, | |
| "learning_rate": 9.998715565072744e-05, | |
| "loss": 6.5369, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 0.32038966310377487, | |
| "grad_norm": 18.565351486206055, | |
| "learning_rate": 9.998643219772664e-05, | |
| "loss": 6.1671, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 0.3209308618590177, | |
| "grad_norm": 45.84898376464844, | |
| "learning_rate": 9.998568892773003e-05, | |
| "loss": 5.9379, | |
| "step": 1186 | |
| }, | |
| { | |
| "epoch": 0.3214720606142606, | |
| "grad_norm": 66.2480239868164, | |
| "learning_rate": 9.998492584103232e-05, | |
| "loss": 5.7071, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 0.3220132593695034, | |
| "grad_norm": 41.693092346191406, | |
| "learning_rate": 9.998414293793599e-05, | |
| "loss": 6.3198, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.3225544581247463, | |
| "grad_norm": 19.323413848876953, | |
| "learning_rate": 9.998334021875147e-05, | |
| "loss": 5.377, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 0.3230956568799892, | |
| "grad_norm": 15.907301902770996, | |
| "learning_rate": 9.998251768379696e-05, | |
| "loss": 4.5293, | |
| "step": 1194 | |
| }, | |
| { | |
| "epoch": 0.32363685563523203, | |
| "grad_norm": 80.1374740600586, | |
| "learning_rate": 9.998167533339857e-05, | |
| "loss": 4.3471, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 0.3241780543904749, | |
| "grad_norm": 23.298336029052734, | |
| "learning_rate": 9.998081316789024e-05, | |
| "loss": 3.7461, | |
| "step": 1198 | |
| }, | |
| { | |
| "epoch": 0.32471925314571776, | |
| "grad_norm": 82.48027801513672, | |
| "learning_rate": 9.997993118761378e-05, | |
| "loss": 4.1647, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.32526045190096065, | |
| "grad_norm": 27.916913986206055, | |
| "learning_rate": 9.997902939291883e-05, | |
| "loss": 3.9092, | |
| "step": 1202 | |
| }, | |
| { | |
| "epoch": 0.3258016506562035, | |
| "grad_norm": 15.70148754119873, | |
| "learning_rate": 9.997810778416293e-05, | |
| "loss": 3.1628, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 0.32634284941144637, | |
| "grad_norm": 18.33330535888672, | |
| "learning_rate": 9.997716636171142e-05, | |
| "loss": 2.8777, | |
| "step": 1206 | |
| }, | |
| { | |
| "epoch": 0.3268840481666892, | |
| "grad_norm": 10.6620512008667, | |
| "learning_rate": 9.997620512593755e-05, | |
| "loss": 2.3009, | |
| "step": 1208 | |
| }, | |
| { | |
| "epoch": 0.3274252469219321, | |
| "grad_norm": 32.01799011230469, | |
| "learning_rate": 9.99752240772224e-05, | |
| "loss": 1.9617, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.3279664456771749, | |
| "grad_norm": 5.677090644836426, | |
| "learning_rate": 9.997422321595488e-05, | |
| "loss": 1.8401, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 0.3285076444324178, | |
| "grad_norm": 8.914667129516602, | |
| "learning_rate": 9.997320254253179e-05, | |
| "loss": 1.6707, | |
| "step": 1214 | |
| }, | |
| { | |
| "epoch": 0.32904884318766064, | |
| "grad_norm": 2.3725008964538574, | |
| "learning_rate": 9.997216205735779e-05, | |
| "loss": 1.5757, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 0.32959004194290353, | |
| "grad_norm": 2.418389320373535, | |
| "learning_rate": 9.997110176084538e-05, | |
| "loss": 1.5154, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 0.33013124069814637, | |
| "grad_norm": 2.802185297012329, | |
| "learning_rate": 9.997002165341487e-05, | |
| "loss": 1.4883, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.33067243945338926, | |
| "grad_norm": 2.1769211292266846, | |
| "learning_rate": 9.996892173549452e-05, | |
| "loss": 1.445, | |
| "step": 1222 | |
| }, | |
| { | |
| "epoch": 0.33121363820863214, | |
| "grad_norm": 1.799670934677124, | |
| "learning_rate": 9.996780200752035e-05, | |
| "loss": 1.4276, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 0.331754836963875, | |
| "grad_norm": 3.2545313835144043, | |
| "learning_rate": 9.996666246993627e-05, | |
| "loss": 1.4394, | |
| "step": 1226 | |
| }, | |
| { | |
| "epoch": 0.33229603571911787, | |
| "grad_norm": 1.1922351121902466, | |
| "learning_rate": 9.996550312319408e-05, | |
| "loss": 1.4359, | |
| "step": 1228 | |
| }, | |
| { | |
| "epoch": 0.3328372344743607, | |
| "grad_norm": 2.6813228130340576, | |
| "learning_rate": 9.996432396775339e-05, | |
| "loss": 1.4229, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.3333784332296036, | |
| "grad_norm": 1.6968843936920166, | |
| "learning_rate": 9.996312500408165e-05, | |
| "loss": 1.4281, | |
| "step": 1232 | |
| }, | |
| { | |
| "epoch": 0.3339196319848464, | |
| "grad_norm": 1.3502254486083984, | |
| "learning_rate": 9.996190623265421e-05, | |
| "loss": 1.408, | |
| "step": 1234 | |
| }, | |
| { | |
| "epoch": 0.3344608307400893, | |
| "grad_norm": 1.2809518575668335, | |
| "learning_rate": 9.996066765395424e-05, | |
| "loss": 1.4176, | |
| "step": 1236 | |
| }, | |
| { | |
| "epoch": 0.33500202949533214, | |
| "grad_norm": 1.0455057621002197, | |
| "learning_rate": 9.995940926847279e-05, | |
| "loss": 1.4056, | |
| "step": 1238 | |
| }, | |
| { | |
| "epoch": 0.33554322825057503, | |
| "grad_norm": 1.3292824029922485, | |
| "learning_rate": 9.99581310767087e-05, | |
| "loss": 1.4033, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.33608442700581787, | |
| "grad_norm": 1.5960067510604858, | |
| "learning_rate": 9.995683307916875e-05, | |
| "loss": 1.379, | |
| "step": 1242 | |
| }, | |
| { | |
| "epoch": 0.33662562576106075, | |
| "grad_norm": 1.0471105575561523, | |
| "learning_rate": 9.99555152763675e-05, | |
| "loss": 1.3823, | |
| "step": 1244 | |
| }, | |
| { | |
| "epoch": 0.3371668245163036, | |
| "grad_norm": 2.339273452758789, | |
| "learning_rate": 9.99541776688274e-05, | |
| "loss": 1.3698, | |
| "step": 1246 | |
| }, | |
| { | |
| "epoch": 0.3377080232715465, | |
| "grad_norm": 0.81674724817276, | |
| "learning_rate": 9.995282025707875e-05, | |
| "loss": 1.4154, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 0.33824922202678936, | |
| "grad_norm": 0.6240290999412537, | |
| "learning_rate": 9.995144304165968e-05, | |
| "loss": 1.4035, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.3387904207820322, | |
| "grad_norm": 2.281787872314453, | |
| "learning_rate": 9.995004602311619e-05, | |
| "loss": 1.3906, | |
| "step": 1252 | |
| }, | |
| { | |
| "epoch": 0.3393316195372751, | |
| "grad_norm": 0.6818395853042603, | |
| "learning_rate": 9.99486292020021e-05, | |
| "loss": 1.3853, | |
| "step": 1254 | |
| }, | |
| { | |
| "epoch": 0.3398728182925179, | |
| "grad_norm": 6.299881935119629, | |
| "learning_rate": 9.994719257887915e-05, | |
| "loss": 1.3856, | |
| "step": 1256 | |
| }, | |
| { | |
| "epoch": 0.3404140170477608, | |
| "grad_norm": 0.8173750638961792, | |
| "learning_rate": 9.994573615431686e-05, | |
| "loss": 1.3871, | |
| "step": 1258 | |
| }, | |
| { | |
| "epoch": 0.34095521580300364, | |
| "grad_norm": 2.155395746231079, | |
| "learning_rate": 9.994425992889262e-05, | |
| "loss": 1.3382, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.34149641455824653, | |
| "grad_norm": 0.5846114754676819, | |
| "learning_rate": 9.99427639031917e-05, | |
| "loss": 1.3978, | |
| "step": 1262 | |
| }, | |
| { | |
| "epoch": 0.34203761331348936, | |
| "grad_norm": 0.6624069213867188, | |
| "learning_rate": 9.994124807780717e-05, | |
| "loss": 1.3792, | |
| "step": 1264 | |
| }, | |
| { | |
| "epoch": 0.34257881206873225, | |
| "grad_norm": 0.5708588361740112, | |
| "learning_rate": 9.993971245333998e-05, | |
| "loss": 1.3677, | |
| "step": 1266 | |
| }, | |
| { | |
| "epoch": 0.3431200108239751, | |
| "grad_norm": 0.5245474576950073, | |
| "learning_rate": 9.993815703039894e-05, | |
| "loss": 1.3672, | |
| "step": 1268 | |
| }, | |
| { | |
| "epoch": 0.343661209579218, | |
| "grad_norm": 0.501871645450592, | |
| "learning_rate": 9.993658180960069e-05, | |
| "loss": 1.3674, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.3442024083344608, | |
| "grad_norm": 0.5990382432937622, | |
| "learning_rate": 9.993498679156969e-05, | |
| "loss": 1.3804, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 0.3447436070897037, | |
| "grad_norm": 0.42392146587371826, | |
| "learning_rate": 9.993337197693833e-05, | |
| "loss": 1.3628, | |
| "step": 1274 | |
| }, | |
| { | |
| "epoch": 0.34528480584494653, | |
| "grad_norm": 0.46936917304992676, | |
| "learning_rate": 9.993173736634676e-05, | |
| "loss": 1.3696, | |
| "step": 1276 | |
| }, | |
| { | |
| "epoch": 0.3458260046001894, | |
| "grad_norm": 0.52222740650177, | |
| "learning_rate": 9.993008296044304e-05, | |
| "loss": 1.3697, | |
| "step": 1278 | |
| }, | |
| { | |
| "epoch": 0.3463672033554323, | |
| "grad_norm": 0.3582518398761749, | |
| "learning_rate": 9.992840875988305e-05, | |
| "loss": 1.3825, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.34690840211067514, | |
| "grad_norm": 0.3533988296985626, | |
| "learning_rate": 9.99267147653305e-05, | |
| "loss": 1.361, | |
| "step": 1282 | |
| }, | |
| { | |
| "epoch": 0.34744960086591803, | |
| "grad_norm": 0.35905274748802185, | |
| "learning_rate": 9.992500097745702e-05, | |
| "loss": 1.3721, | |
| "step": 1284 | |
| }, | |
| { | |
| "epoch": 0.34799079962116086, | |
| "grad_norm": 0.3057416081428528, | |
| "learning_rate": 9.9923267396942e-05, | |
| "loss": 1.369, | |
| "step": 1286 | |
| }, | |
| { | |
| "epoch": 0.34853199837640375, | |
| "grad_norm": 0.3299311101436615, | |
| "learning_rate": 9.992151402447272e-05, | |
| "loss": 1.358, | |
| "step": 1288 | |
| }, | |
| { | |
| "epoch": 0.3490731971316466, | |
| "grad_norm": 0.3086453080177307, | |
| "learning_rate": 9.99197408607443e-05, | |
| "loss": 1.3534, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.3496143958868895, | |
| "grad_norm": 0.3111782968044281, | |
| "learning_rate": 9.991794790645969e-05, | |
| "loss": 1.3605, | |
| "step": 1292 | |
| }, | |
| { | |
| "epoch": 0.3501555946421323, | |
| "grad_norm": 0.3231568932533264, | |
| "learning_rate": 9.991613516232974e-05, | |
| "loss": 1.3543, | |
| "step": 1294 | |
| }, | |
| { | |
| "epoch": 0.3506967933973752, | |
| "grad_norm": 0.3288814425468445, | |
| "learning_rate": 9.991430262907309e-05, | |
| "loss": 1.3521, | |
| "step": 1296 | |
| }, | |
| { | |
| "epoch": 0.35123799215261803, | |
| "grad_norm": 0.3239436745643616, | |
| "learning_rate": 9.991245030741622e-05, | |
| "loss": 1.3335, | |
| "step": 1298 | |
| }, | |
| { | |
| "epoch": 0.3517791909078609, | |
| "grad_norm": 0.3560773730278015, | |
| "learning_rate": 9.991057819809353e-05, | |
| "loss": 1.3487, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.35232038966310375, | |
| "grad_norm": 0.4387347400188446, | |
| "learning_rate": 9.990868630184716e-05, | |
| "loss": 1.3548, | |
| "step": 1302 | |
| }, | |
| { | |
| "epoch": 0.35286158841834664, | |
| "grad_norm": 0.32067278027534485, | |
| "learning_rate": 9.990677461942717e-05, | |
| "loss": 1.3471, | |
| "step": 1304 | |
| }, | |
| { | |
| "epoch": 0.3534027871735895, | |
| "grad_norm": 0.4399580955505371, | |
| "learning_rate": 9.990484315159146e-05, | |
| "loss": 1.3588, | |
| "step": 1306 | |
| }, | |
| { | |
| "epoch": 0.35394398592883236, | |
| "grad_norm": 0.9175602793693542, | |
| "learning_rate": 9.990289189910571e-05, | |
| "loss": 1.3432, | |
| "step": 1308 | |
| }, | |
| { | |
| "epoch": 0.35448518468407525, | |
| "grad_norm": 0.45273318886756897, | |
| "learning_rate": 9.990092086274352e-05, | |
| "loss": 1.3434, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.3550263834393181, | |
| "grad_norm": 0.3346487879753113, | |
| "learning_rate": 9.989893004328632e-05, | |
| "loss": 1.3339, | |
| "step": 1312 | |
| }, | |
| { | |
| "epoch": 0.35556758219456097, | |
| "grad_norm": 0.4779951870441437, | |
| "learning_rate": 9.989691944152333e-05, | |
| "loss": 1.3561, | |
| "step": 1314 | |
| }, | |
| { | |
| "epoch": 0.3561087809498038, | |
| "grad_norm": 0.6359366774559021, | |
| "learning_rate": 9.989488905825166e-05, | |
| "loss": 1.3499, | |
| "step": 1316 | |
| }, | |
| { | |
| "epoch": 0.3566499797050467, | |
| "grad_norm": 0.5867050290107727, | |
| "learning_rate": 9.989283889427625e-05, | |
| "loss": 1.3791, | |
| "step": 1318 | |
| }, | |
| { | |
| "epoch": 0.3571911784602895, | |
| "grad_norm": 1.869691014289856, | |
| "learning_rate": 9.989076895040989e-05, | |
| "loss": 1.3663, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.3577323772155324, | |
| "grad_norm": 2.7147843837738037, | |
| "learning_rate": 9.98886792274732e-05, | |
| "loss": 1.358, | |
| "step": 1322 | |
| }, | |
| { | |
| "epoch": 0.35827357597077525, | |
| "grad_norm": 0.8717885613441467, | |
| "learning_rate": 9.988656972629465e-05, | |
| "loss": 1.34, | |
| "step": 1324 | |
| }, | |
| { | |
| "epoch": 0.35881477472601814, | |
| "grad_norm": 0.7126337885856628, | |
| "learning_rate": 9.988444044771054e-05, | |
| "loss": 1.3281, | |
| "step": 1326 | |
| }, | |
| { | |
| "epoch": 0.35935597348126097, | |
| "grad_norm": 0.7409217357635498, | |
| "learning_rate": 9.988229139256502e-05, | |
| "loss": 1.3571, | |
| "step": 1328 | |
| }, | |
| { | |
| "epoch": 0.35989717223650386, | |
| "grad_norm": 0.5892549157142639, | |
| "learning_rate": 9.988012256171006e-05, | |
| "loss": 1.3269, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.3604383709917467, | |
| "grad_norm": 0.4858717620372772, | |
| "learning_rate": 9.98779339560055e-05, | |
| "loss": 1.3506, | |
| "step": 1332 | |
| }, | |
| { | |
| "epoch": 0.3609795697469896, | |
| "grad_norm": 0.37409740686416626, | |
| "learning_rate": 9.987572557631903e-05, | |
| "loss": 1.3339, | |
| "step": 1334 | |
| }, | |
| { | |
| "epoch": 0.36152076850223247, | |
| "grad_norm": 0.38315168023109436, | |
| "learning_rate": 9.987349742352611e-05, | |
| "loss": 1.3404, | |
| "step": 1336 | |
| }, | |
| { | |
| "epoch": 0.3620619672574753, | |
| "grad_norm": 0.32702726125717163, | |
| "learning_rate": 9.987124949851014e-05, | |
| "loss": 1.3595, | |
| "step": 1338 | |
| }, | |
| { | |
| "epoch": 0.3626031660127182, | |
| "grad_norm": 0.3133656680583954, | |
| "learning_rate": 9.986898180216226e-05, | |
| "loss": 1.3428, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.363144364767961, | |
| "grad_norm": 0.2916230857372284, | |
| "learning_rate": 9.986669433538152e-05, | |
| "loss": 1.3381, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 0.3636855635232039, | |
| "grad_norm": 0.28036215901374817, | |
| "learning_rate": 9.986438709907476e-05, | |
| "loss": 1.3447, | |
| "step": 1344 | |
| }, | |
| { | |
| "epoch": 0.36422676227844675, | |
| "grad_norm": 0.30352699756622314, | |
| "learning_rate": 9.98620600941567e-05, | |
| "loss": 1.3427, | |
| "step": 1346 | |
| }, | |
| { | |
| "epoch": 0.36476796103368964, | |
| "grad_norm": 0.3100769519805908, | |
| "learning_rate": 9.985971332154984e-05, | |
| "loss": 1.3603, | |
| "step": 1348 | |
| }, | |
| { | |
| "epoch": 0.36530915978893247, | |
| "grad_norm": 0.2933647930622101, | |
| "learning_rate": 9.98573467821846e-05, | |
| "loss": 1.3646, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.36585035854417536, | |
| "grad_norm": 0.2938663959503174, | |
| "learning_rate": 9.985496047699916e-05, | |
| "loss": 1.3763, | |
| "step": 1352 | |
| }, | |
| { | |
| "epoch": 0.3663915572994182, | |
| "grad_norm": 0.2916519343852997, | |
| "learning_rate": 9.985255440693955e-05, | |
| "loss": 1.3431, | |
| "step": 1354 | |
| }, | |
| { | |
| "epoch": 0.3669327560546611, | |
| "grad_norm": 0.2954147756099701, | |
| "learning_rate": 9.985012857295968e-05, | |
| "loss": 1.338, | |
| "step": 1356 | |
| }, | |
| { | |
| "epoch": 0.3674739548099039, | |
| "grad_norm": 0.2839341163635254, | |
| "learning_rate": 9.984768297602125e-05, | |
| "loss": 1.3653, | |
| "step": 1358 | |
| }, | |
| { | |
| "epoch": 0.3680151535651468, | |
| "grad_norm": 0.2878473699092865, | |
| "learning_rate": 9.984521761709382e-05, | |
| "loss": 1.3302, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.3685563523203897, | |
| "grad_norm": 0.2859325408935547, | |
| "learning_rate": 9.984273249715478e-05, | |
| "loss": 1.3273, | |
| "step": 1362 | |
| }, | |
| { | |
| "epoch": 0.3690975510756325, | |
| "grad_norm": 0.28399959206581116, | |
| "learning_rate": 9.984022761718933e-05, | |
| "loss": 1.3516, | |
| "step": 1364 | |
| }, | |
| { | |
| "epoch": 0.3696387498308754, | |
| "grad_norm": 0.29740169644355774, | |
| "learning_rate": 9.983770297819052e-05, | |
| "loss": 1.3389, | |
| "step": 1366 | |
| }, | |
| { | |
| "epoch": 0.37017994858611825, | |
| "grad_norm": 0.3143361806869507, | |
| "learning_rate": 9.983515858115928e-05, | |
| "loss": 1.3557, | |
| "step": 1368 | |
| }, | |
| { | |
| "epoch": 0.37072114734136113, | |
| "grad_norm": 0.30783936381340027, | |
| "learning_rate": 9.983259442710429e-05, | |
| "loss": 1.3498, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.37126234609660397, | |
| "grad_norm": 0.297091543674469, | |
| "learning_rate": 9.983001051704211e-05, | |
| "loss": 1.3308, | |
| "step": 1372 | |
| }, | |
| { | |
| "epoch": 0.37180354485184686, | |
| "grad_norm": 0.3118893504142761, | |
| "learning_rate": 9.982740685199712e-05, | |
| "loss": 1.3372, | |
| "step": 1374 | |
| }, | |
| { | |
| "epoch": 0.3723447436070897, | |
| "grad_norm": 0.2826865017414093, | |
| "learning_rate": 9.982478343300155e-05, | |
| "loss": 1.3488, | |
| "step": 1376 | |
| }, | |
| { | |
| "epoch": 0.3728859423623326, | |
| "grad_norm": 0.2829175889492035, | |
| "learning_rate": 9.982214026109544e-05, | |
| "loss": 1.3693, | |
| "step": 1378 | |
| }, | |
| { | |
| "epoch": 0.3734271411175754, | |
| "grad_norm": 0.3026389479637146, | |
| "learning_rate": 9.981947733732668e-05, | |
| "loss": 1.3276, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.3739683398728183, | |
| "grad_norm": 0.30112889409065247, | |
| "learning_rate": 9.981679466275096e-05, | |
| "loss": 1.3441, | |
| "step": 1382 | |
| }, | |
| { | |
| "epoch": 0.37450953862806113, | |
| "grad_norm": 0.27241262793540955, | |
| "learning_rate": 9.981409223843183e-05, | |
| "loss": 1.3373, | |
| "step": 1384 | |
| }, | |
| { | |
| "epoch": 0.375050737383304, | |
| "grad_norm": 0.2804114520549774, | |
| "learning_rate": 9.981137006544066e-05, | |
| "loss": 1.344, | |
| "step": 1386 | |
| }, | |
| { | |
| "epoch": 0.37559193613854686, | |
| "grad_norm": 0.27698764204978943, | |
| "learning_rate": 9.980862814485665e-05, | |
| "loss": 1.3543, | |
| "step": 1388 | |
| }, | |
| { | |
| "epoch": 0.37613313489378974, | |
| "grad_norm": 0.29283177852630615, | |
| "learning_rate": 9.980586647776681e-05, | |
| "loss": 1.3332, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.37667433364903263, | |
| "grad_norm": 0.2896028459072113, | |
| "learning_rate": 9.980308506526604e-05, | |
| "loss": 1.3392, | |
| "step": 1392 | |
| }, | |
| { | |
| "epoch": 0.37721553240427547, | |
| "grad_norm": 0.27882838249206543, | |
| "learning_rate": 9.980028390845697e-05, | |
| "loss": 1.336, | |
| "step": 1394 | |
| }, | |
| { | |
| "epoch": 0.37775673115951836, | |
| "grad_norm": 0.2886262834072113, | |
| "learning_rate": 9.979746300845015e-05, | |
| "loss": 1.3331, | |
| "step": 1396 | |
| }, | |
| { | |
| "epoch": 0.3782979299147612, | |
| "grad_norm": 0.3085189163684845, | |
| "learning_rate": 9.97946223663639e-05, | |
| "loss": 1.3296, | |
| "step": 1398 | |
| }, | |
| { | |
| "epoch": 0.3788391286700041, | |
| "grad_norm": 0.3342386484146118, | |
| "learning_rate": 9.97917619833244e-05, | |
| "loss": 1.351, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.3793803274252469, | |
| "grad_norm": 0.3263756036758423, | |
| "learning_rate": 9.978888186046562e-05, | |
| "loss": 1.3526, | |
| "step": 1402 | |
| }, | |
| { | |
| "epoch": 0.3799215261804898, | |
| "grad_norm": 0.292346715927124, | |
| "learning_rate": 9.97859819989294e-05, | |
| "loss": 1.3498, | |
| "step": 1404 | |
| }, | |
| { | |
| "epoch": 0.38046272493573263, | |
| "grad_norm": 0.29072263836860657, | |
| "learning_rate": 9.978306239986536e-05, | |
| "loss": 1.3423, | |
| "step": 1406 | |
| }, | |
| { | |
| "epoch": 0.3810039236909755, | |
| "grad_norm": 0.3350834548473358, | |
| "learning_rate": 9.978012306443101e-05, | |
| "loss": 1.3559, | |
| "step": 1408 | |
| }, | |
| { | |
| "epoch": 0.38154512244621835, | |
| "grad_norm": 0.28721559047698975, | |
| "learning_rate": 9.977716399379157e-05, | |
| "loss": 1.3294, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.38208632120146124, | |
| "grad_norm": 0.3062276244163513, | |
| "learning_rate": 9.977418518912023e-05, | |
| "loss": 1.3457, | |
| "step": 1412 | |
| }, | |
| { | |
| "epoch": 0.3826275199567041, | |
| "grad_norm": 0.30255332589149475, | |
| "learning_rate": 9.977118665159791e-05, | |
| "loss": 1.3371, | |
| "step": 1414 | |
| }, | |
| { | |
| "epoch": 0.38316871871194697, | |
| "grad_norm": 0.2800199091434479, | |
| "learning_rate": 9.976816838241334e-05, | |
| "loss": 1.3439, | |
| "step": 1416 | |
| }, | |
| { | |
| "epoch": 0.3837099174671898, | |
| "grad_norm": 0.2754746675491333, | |
| "learning_rate": 9.976513038276312e-05, | |
| "loss": 1.3303, | |
| "step": 1418 | |
| }, | |
| { | |
| "epoch": 0.3842511162224327, | |
| "grad_norm": 0.29933616518974304, | |
| "learning_rate": 9.976207265385168e-05, | |
| "loss": 1.3365, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.3847923149776756, | |
| "grad_norm": 0.3023386001586914, | |
| "learning_rate": 9.975899519689122e-05, | |
| "loss": 1.3164, | |
| "step": 1422 | |
| }, | |
| { | |
| "epoch": 0.3853335137329184, | |
| "grad_norm": 0.2901383936405182, | |
| "learning_rate": 9.975589801310181e-05, | |
| "loss": 1.3209, | |
| "step": 1424 | |
| }, | |
| { | |
| "epoch": 0.3858747124881613, | |
| "grad_norm": 0.28566035628318787, | |
| "learning_rate": 9.975278110371131e-05, | |
| "loss": 1.3301, | |
| "step": 1426 | |
| }, | |
| { | |
| "epoch": 0.38641591124340413, | |
| "grad_norm": 0.3010505735874176, | |
| "learning_rate": 9.974964446995543e-05, | |
| "loss": 1.319, | |
| "step": 1428 | |
| }, | |
| { | |
| "epoch": 0.386957109998647, | |
| "grad_norm": 0.2977135479450226, | |
| "learning_rate": 9.974648811307766e-05, | |
| "loss": 1.3311, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.38749830875388985, | |
| "grad_norm": 0.28914034366607666, | |
| "learning_rate": 9.974331203432932e-05, | |
| "loss": 1.343, | |
| "step": 1432 | |
| }, | |
| { | |
| "epoch": 0.38803950750913274, | |
| "grad_norm": 0.2842980623245239, | |
| "learning_rate": 9.974011623496958e-05, | |
| "loss": 1.3162, | |
| "step": 1434 | |
| }, | |
| { | |
| "epoch": 0.3885807062643756, | |
| "grad_norm": 0.3048929274082184, | |
| "learning_rate": 9.97369007162654e-05, | |
| "loss": 1.3166, | |
| "step": 1436 | |
| }, | |
| { | |
| "epoch": 0.38912190501961846, | |
| "grad_norm": 0.3024531304836273, | |
| "learning_rate": 9.973366547949157e-05, | |
| "loss": 1.3156, | |
| "step": 1438 | |
| }, | |
| { | |
| "epoch": 0.3896631037748613, | |
| "grad_norm": 0.2911103367805481, | |
| "learning_rate": 9.973041052593068e-05, | |
| "loss": 1.3314, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.3902043025301042, | |
| "grad_norm": 0.30932334065437317, | |
| "learning_rate": 9.972713585687317e-05, | |
| "loss": 1.3144, | |
| "step": 1442 | |
| }, | |
| { | |
| "epoch": 0.390745501285347, | |
| "grad_norm": 0.302971750497818, | |
| "learning_rate": 9.972384147361725e-05, | |
| "loss": 1.3431, | |
| "step": 1444 | |
| }, | |
| { | |
| "epoch": 0.3912867000405899, | |
| "grad_norm": 0.32412296533584595, | |
| "learning_rate": 9.972052737746898e-05, | |
| "loss": 1.3167, | |
| "step": 1446 | |
| }, | |
| { | |
| "epoch": 0.3918278987958328, | |
| "grad_norm": 0.4637945890426636, | |
| "learning_rate": 9.97171935697422e-05, | |
| "loss": 1.3433, | |
| "step": 1448 | |
| }, | |
| { | |
| "epoch": 0.39236909755107563, | |
| "grad_norm": 0.32690081000328064, | |
| "learning_rate": 9.971384005175864e-05, | |
| "loss": 1.3327, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.3929102963063185, | |
| "grad_norm": 0.3049994111061096, | |
| "learning_rate": 9.971046682484776e-05, | |
| "loss": 1.3401, | |
| "step": 1452 | |
| }, | |
| { | |
| "epoch": 0.39345149506156135, | |
| "grad_norm": 0.306095689535141, | |
| "learning_rate": 9.970707389034688e-05, | |
| "loss": 1.3205, | |
| "step": 1454 | |
| }, | |
| { | |
| "epoch": 0.39399269381680424, | |
| "grad_norm": 0.3375592529773712, | |
| "learning_rate": 9.970366124960111e-05, | |
| "loss": 1.3243, | |
| "step": 1456 | |
| }, | |
| { | |
| "epoch": 0.3945338925720471, | |
| "grad_norm": 0.30508387088775635, | |
| "learning_rate": 9.970022890396338e-05, | |
| "loss": 1.3342, | |
| "step": 1458 | |
| }, | |
| { | |
| "epoch": 0.39507509132728996, | |
| "grad_norm": 0.2996918261051178, | |
| "learning_rate": 9.969677685479444e-05, | |
| "loss": 1.3457, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.3956162900825328, | |
| "grad_norm": 0.29500269889831543, | |
| "learning_rate": 9.969330510346286e-05, | |
| "loss": 1.3306, | |
| "step": 1462 | |
| }, | |
| { | |
| "epoch": 0.3961574888377757, | |
| "grad_norm": 0.28392598032951355, | |
| "learning_rate": 9.9689813651345e-05, | |
| "loss": 1.3347, | |
| "step": 1464 | |
| }, | |
| { | |
| "epoch": 0.3966986875930185, | |
| "grad_norm": 0.2859434485435486, | |
| "learning_rate": 9.968630249982503e-05, | |
| "loss": 1.3342, | |
| "step": 1466 | |
| }, | |
| { | |
| "epoch": 0.3972398863482614, | |
| "grad_norm": 0.3038876950740814, | |
| "learning_rate": 9.968277165029494e-05, | |
| "loss": 1.3248, | |
| "step": 1468 | |
| }, | |
| { | |
| "epoch": 0.39778108510350424, | |
| "grad_norm": 0.3060581088066101, | |
| "learning_rate": 9.967922110415454e-05, | |
| "loss": 1.3403, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.39832228385874713, | |
| "grad_norm": 0.30475133657455444, | |
| "learning_rate": 9.96756508628114e-05, | |
| "loss": 1.3338, | |
| "step": 1472 | |
| }, | |
| { | |
| "epoch": 0.39886348261398996, | |
| "grad_norm": 0.33263343572616577, | |
| "learning_rate": 9.967206092768095e-05, | |
| "loss": 1.3209, | |
| "step": 1474 | |
| }, | |
| { | |
| "epoch": 0.39940468136923285, | |
| "grad_norm": 0.2895435094833374, | |
| "learning_rate": 9.966845130018645e-05, | |
| "loss": 1.3352, | |
| "step": 1476 | |
| }, | |
| { | |
| "epoch": 0.39994588012447574, | |
| "grad_norm": 0.27237775921821594, | |
| "learning_rate": 9.966482198175886e-05, | |
| "loss": 1.3239, | |
| "step": 1478 | |
| }, | |
| { | |
| "epoch": 0.40048707887971857, | |
| "grad_norm": 0.2740168571472168, | |
| "learning_rate": 9.966117297383707e-05, | |
| "loss": 1.3371, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.40102827763496146, | |
| "grad_norm": 0.30601269006729126, | |
| "learning_rate": 9.965750427786768e-05, | |
| "loss": 1.343, | |
| "step": 1482 | |
| }, | |
| { | |
| "epoch": 0.4015694763902043, | |
| "grad_norm": 0.28768840432167053, | |
| "learning_rate": 9.965381589530518e-05, | |
| "loss": 1.3442, | |
| "step": 1484 | |
| }, | |
| { | |
| "epoch": 0.4021106751454472, | |
| "grad_norm": 0.28244882822036743, | |
| "learning_rate": 9.965010782761177e-05, | |
| "loss": 1.3336, | |
| "step": 1486 | |
| }, | |
| { | |
| "epoch": 0.40265187390069, | |
| "grad_norm": 0.2694818079471588, | |
| "learning_rate": 9.964638007625754e-05, | |
| "loss": 1.3448, | |
| "step": 1488 | |
| }, | |
| { | |
| "epoch": 0.4031930726559329, | |
| "grad_norm": 0.29507288336753845, | |
| "learning_rate": 9.964263264272033e-05, | |
| "loss": 1.327, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.40373427141117574, | |
| "grad_norm": 0.3036315143108368, | |
| "learning_rate": 9.963886552848581e-05, | |
| "loss": 1.3289, | |
| "step": 1492 | |
| }, | |
| { | |
| "epoch": 0.4042754701664186, | |
| "grad_norm": 0.2737107574939728, | |
| "learning_rate": 9.963507873504744e-05, | |
| "loss": 1.3281, | |
| "step": 1494 | |
| }, | |
| { | |
| "epoch": 0.40481666892166146, | |
| "grad_norm": 0.29833105206489563, | |
| "learning_rate": 9.963127226390647e-05, | |
| "loss": 1.3378, | |
| "step": 1496 | |
| }, | |
| { | |
| "epoch": 0.40535786767690435, | |
| "grad_norm": 0.32203689217567444, | |
| "learning_rate": 9.9627446116572e-05, | |
| "loss": 1.3158, | |
| "step": 1498 | |
| }, | |
| { | |
| "epoch": 0.4058990664321472, | |
| "grad_norm": 0.27837038040161133, | |
| "learning_rate": 9.962360029456086e-05, | |
| "loss": 1.3051, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.40644026518739007, | |
| "grad_norm": 0.2688932418823242, | |
| "learning_rate": 9.961973479939774e-05, | |
| "loss": 1.339, | |
| "step": 1502 | |
| }, | |
| { | |
| "epoch": 0.40698146394263296, | |
| "grad_norm": 0.2779388725757599, | |
| "learning_rate": 9.96158496326151e-05, | |
| "loss": 1.3264, | |
| "step": 1504 | |
| }, | |
| { | |
| "epoch": 0.4075226626978758, | |
| "grad_norm": 0.27401190996170044, | |
| "learning_rate": 9.961194479575321e-05, | |
| "loss": 1.3139, | |
| "step": 1506 | |
| }, | |
| { | |
| "epoch": 0.4080638614531187, | |
| "grad_norm": 0.270448237657547, | |
| "learning_rate": 9.960802029036012e-05, | |
| "loss": 1.3253, | |
| "step": 1508 | |
| }, | |
| { | |
| "epoch": 0.4086050602083615, | |
| "grad_norm": 0.29150158166885376, | |
| "learning_rate": 9.96040761179917e-05, | |
| "loss": 1.3324, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.4091462589636044, | |
| "grad_norm": 0.2666511833667755, | |
| "learning_rate": 9.960011228021159e-05, | |
| "loss": 1.325, | |
| "step": 1512 | |
| }, | |
| { | |
| "epoch": 0.40968745771884724, | |
| "grad_norm": 0.2782241106033325, | |
| "learning_rate": 9.959612877859125e-05, | |
| "loss": 1.3162, | |
| "step": 1514 | |
| }, | |
| { | |
| "epoch": 0.4102286564740901, | |
| "grad_norm": 0.2845720946788788, | |
| "learning_rate": 9.959212561470996e-05, | |
| "loss": 1.3316, | |
| "step": 1516 | |
| }, | |
| { | |
| "epoch": 0.41076985522933296, | |
| "grad_norm": 0.27991780638694763, | |
| "learning_rate": 9.958810279015473e-05, | |
| "loss": 1.3121, | |
| "step": 1518 | |
| }, | |
| { | |
| "epoch": 0.41131105398457585, | |
| "grad_norm": 0.2804965674877167, | |
| "learning_rate": 9.958406030652043e-05, | |
| "loss": 1.3246, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.4118522527398187, | |
| "grad_norm": 0.2732795178890228, | |
| "learning_rate": 9.957999816540965e-05, | |
| "loss": 1.3217, | |
| "step": 1522 | |
| }, | |
| { | |
| "epoch": 0.41239345149506157, | |
| "grad_norm": 0.28181079030036926, | |
| "learning_rate": 9.957591636843284e-05, | |
| "loss": 1.3374, | |
| "step": 1524 | |
| }, | |
| { | |
| "epoch": 0.4129346502503044, | |
| "grad_norm": 0.3096240162849426, | |
| "learning_rate": 9.957181491720822e-05, | |
| "loss": 1.3324, | |
| "step": 1526 | |
| }, | |
| { | |
| "epoch": 0.4134758490055473, | |
| "grad_norm": 0.2709742486476898, | |
| "learning_rate": 9.95676938133618e-05, | |
| "loss": 1.3055, | |
| "step": 1528 | |
| }, | |
| { | |
| "epoch": 0.4140170477607901, | |
| "grad_norm": 0.27309080958366394, | |
| "learning_rate": 9.956355305852736e-05, | |
| "loss": 1.313, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.414558246516033, | |
| "grad_norm": 0.29801151156425476, | |
| "learning_rate": 9.955939265434652e-05, | |
| "loss": 1.3185, | |
| "step": 1532 | |
| }, | |
| { | |
| "epoch": 0.4150994452712759, | |
| "grad_norm": 0.28698021173477173, | |
| "learning_rate": 9.955521260246865e-05, | |
| "loss": 1.3214, | |
| "step": 1534 | |
| }, | |
| { | |
| "epoch": 0.41564064402651874, | |
| "grad_norm": 0.2641914188861847, | |
| "learning_rate": 9.955101290455093e-05, | |
| "loss": 1.317, | |
| "step": 1536 | |
| }, | |
| { | |
| "epoch": 0.4161818427817616, | |
| "grad_norm": 0.26065558195114136, | |
| "learning_rate": 9.954679356225832e-05, | |
| "loss": 1.3253, | |
| "step": 1538 | |
| }, | |
| { | |
| "epoch": 0.41672304153700446, | |
| "grad_norm": 0.27157294750213623, | |
| "learning_rate": 9.954255457726354e-05, | |
| "loss": 1.3218, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.41726424029224735, | |
| "grad_norm": 0.2833496630191803, | |
| "learning_rate": 9.953829595124715e-05, | |
| "loss": 1.32, | |
| "step": 1542 | |
| }, | |
| { | |
| "epoch": 0.4178054390474902, | |
| "grad_norm": 0.2757824659347534, | |
| "learning_rate": 9.953401768589745e-05, | |
| "loss": 1.3165, | |
| "step": 1544 | |
| }, | |
| { | |
| "epoch": 0.41834663780273307, | |
| "grad_norm": 0.2609362304210663, | |
| "learning_rate": 9.952971978291059e-05, | |
| "loss": 1.3229, | |
| "step": 1546 | |
| }, | |
| { | |
| "epoch": 0.4188878365579759, | |
| "grad_norm": 0.2863214313983917, | |
| "learning_rate": 9.952540224399043e-05, | |
| "loss": 1.3217, | |
| "step": 1548 | |
| }, | |
| { | |
| "epoch": 0.4194290353132188, | |
| "grad_norm": 0.27573657035827637, | |
| "learning_rate": 9.952106507084864e-05, | |
| "loss": 1.3151, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.4199702340684616, | |
| "grad_norm": 0.26843398809432983, | |
| "learning_rate": 9.95167082652047e-05, | |
| "loss": 1.3185, | |
| "step": 1552 | |
| }, | |
| { | |
| "epoch": 0.4205114328237045, | |
| "grad_norm": 0.25903749465942383, | |
| "learning_rate": 9.951233182878585e-05, | |
| "loss": 1.3142, | |
| "step": 1554 | |
| }, | |
| { | |
| "epoch": 0.42105263157894735, | |
| "grad_norm": 0.27221450209617615, | |
| "learning_rate": 9.950793576332713e-05, | |
| "loss": 1.3119, | |
| "step": 1556 | |
| }, | |
| { | |
| "epoch": 0.42159383033419023, | |
| "grad_norm": 0.2897038161754608, | |
| "learning_rate": 9.950352007057134e-05, | |
| "loss": 1.3217, | |
| "step": 1558 | |
| }, | |
| { | |
| "epoch": 0.42213502908943307, | |
| "grad_norm": 0.2515231668949127, | |
| "learning_rate": 9.949908475226905e-05, | |
| "loss": 1.3263, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.42267622784467596, | |
| "grad_norm": 0.26686710119247437, | |
| "learning_rate": 9.949462981017865e-05, | |
| "loss": 1.3269, | |
| "step": 1562 | |
| }, | |
| { | |
| "epoch": 0.42321742659991884, | |
| "grad_norm": 0.2747204899787903, | |
| "learning_rate": 9.949015524606629e-05, | |
| "loss": 1.3208, | |
| "step": 1564 | |
| }, | |
| { | |
| "epoch": 0.4237586253551617, | |
| "grad_norm": 0.25866395235061646, | |
| "learning_rate": 9.948566106170589e-05, | |
| "loss": 1.3273, | |
| "step": 1566 | |
| }, | |
| { | |
| "epoch": 0.42429982411040457, | |
| "grad_norm": 0.2659189999103546, | |
| "learning_rate": 9.948114725887918e-05, | |
| "loss": 1.2955, | |
| "step": 1568 | |
| }, | |
| { | |
| "epoch": 0.4248410228656474, | |
| "grad_norm": 0.25262853503227234, | |
| "learning_rate": 9.947661383937563e-05, | |
| "loss": 1.284, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.4253822216208903, | |
| "grad_norm": 0.24780422449111938, | |
| "learning_rate": 9.94720608049925e-05, | |
| "loss": 1.3168, | |
| "step": 1572 | |
| }, | |
| { | |
| "epoch": 0.4259234203761331, | |
| "grad_norm": 0.2663845121860504, | |
| "learning_rate": 9.946748815753484e-05, | |
| "loss": 1.313, | |
| "step": 1574 | |
| }, | |
| { | |
| "epoch": 0.426464619131376, | |
| "grad_norm": 0.2906511425971985, | |
| "learning_rate": 9.946289589881545e-05, | |
| "loss": 1.3197, | |
| "step": 1576 | |
| }, | |
| { | |
| "epoch": 0.42700581788661884, | |
| "grad_norm": 0.28401264548301697, | |
| "learning_rate": 9.945828403065493e-05, | |
| "loss": 1.3254, | |
| "step": 1578 | |
| }, | |
| { | |
| "epoch": 0.42754701664186173, | |
| "grad_norm": 0.27820122241973877, | |
| "learning_rate": 9.945365255488164e-05, | |
| "loss": 1.3153, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.42808821539710457, | |
| "grad_norm": 0.2573559880256653, | |
| "learning_rate": 9.944900147333173e-05, | |
| "loss": 1.3144, | |
| "step": 1582 | |
| }, | |
| { | |
| "epoch": 0.42862941415234745, | |
| "grad_norm": 0.2536357343196869, | |
| "learning_rate": 9.944433078784909e-05, | |
| "loss": 1.3172, | |
| "step": 1584 | |
| }, | |
| { | |
| "epoch": 0.4291706129075903, | |
| "grad_norm": 0.2745160758495331, | |
| "learning_rate": 9.94396405002854e-05, | |
| "loss": 1.3023, | |
| "step": 1586 | |
| }, | |
| { | |
| "epoch": 0.4297118116628332, | |
| "grad_norm": 0.290393203496933, | |
| "learning_rate": 9.943493061250013e-05, | |
| "loss": 1.3095, | |
| "step": 1588 | |
| }, | |
| { | |
| "epoch": 0.43025301041807607, | |
| "grad_norm": 0.29357218742370605, | |
| "learning_rate": 9.94302011263605e-05, | |
| "loss": 1.3232, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.4307942091733189, | |
| "grad_norm": 0.2756180167198181, | |
| "learning_rate": 9.94254520437415e-05, | |
| "loss": 1.3179, | |
| "step": 1592 | |
| }, | |
| { | |
| "epoch": 0.4313354079285618, | |
| "grad_norm": 0.30225417017936707, | |
| "learning_rate": 9.942068336652589e-05, | |
| "loss": 1.3353, | |
| "step": 1594 | |
| }, | |
| { | |
| "epoch": 0.4318766066838046, | |
| "grad_norm": 0.26694637537002563, | |
| "learning_rate": 9.94158950966042e-05, | |
| "loss": 1.318, | |
| "step": 1596 | |
| }, | |
| { | |
| "epoch": 0.4324178054390475, | |
| "grad_norm": 0.2528863549232483, | |
| "learning_rate": 9.941108723587471e-05, | |
| "loss": 1.3282, | |
| "step": 1598 | |
| }, | |
| { | |
| "epoch": 0.43295900419429034, | |
| "grad_norm": 0.25261232256889343, | |
| "learning_rate": 9.940625978624353e-05, | |
| "loss": 1.3178, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.43350020294953323, | |
| "grad_norm": 0.2624775767326355, | |
| "learning_rate": 9.940141274962444e-05, | |
| "loss": 1.31, | |
| "step": 1602 | |
| }, | |
| { | |
| "epoch": 0.43404140170477606, | |
| "grad_norm": 0.260810524225235, | |
| "learning_rate": 9.939654612793908e-05, | |
| "loss": 1.3162, | |
| "step": 1604 | |
| }, | |
| { | |
| "epoch": 0.43458260046001895, | |
| "grad_norm": 0.2815745174884796, | |
| "learning_rate": 9.939165992311676e-05, | |
| "loss": 1.3112, | |
| "step": 1606 | |
| }, | |
| { | |
| "epoch": 0.4351237992152618, | |
| "grad_norm": 0.2773973345756531, | |
| "learning_rate": 9.938675413709466e-05, | |
| "loss": 1.3, | |
| "step": 1608 | |
| }, | |
| { | |
| "epoch": 0.4356649979705047, | |
| "grad_norm": 0.26486915349960327, | |
| "learning_rate": 9.938182877181763e-05, | |
| "loss": 1.3193, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.4362061967257475, | |
| "grad_norm": 0.26103830337524414, | |
| "learning_rate": 9.937688382923832e-05, | |
| "loss": 1.3244, | |
| "step": 1612 | |
| }, | |
| { | |
| "epoch": 0.4367473954809904, | |
| "grad_norm": 0.2556493878364563, | |
| "learning_rate": 9.937191931131716e-05, | |
| "loss": 1.3087, | |
| "step": 1614 | |
| }, | |
| { | |
| "epoch": 0.43728859423623323, | |
| "grad_norm": 0.2739090919494629, | |
| "learning_rate": 9.93669352200223e-05, | |
| "loss": 1.3009, | |
| "step": 1616 | |
| }, | |
| { | |
| "epoch": 0.4378297929914761, | |
| "grad_norm": 0.26297444105148315, | |
| "learning_rate": 9.936193155732967e-05, | |
| "loss": 1.2971, | |
| "step": 1618 | |
| }, | |
| { | |
| "epoch": 0.438370991746719, | |
| "grad_norm": 0.2587411403656006, | |
| "learning_rate": 9.935690832522297e-05, | |
| "loss": 1.3259, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.43891219050196184, | |
| "grad_norm": 0.2419731616973877, | |
| "learning_rate": 9.935186552569366e-05, | |
| "loss": 1.3123, | |
| "step": 1622 | |
| }, | |
| { | |
| "epoch": 0.43945338925720473, | |
| "grad_norm": 0.27424389123916626, | |
| "learning_rate": 9.934680316074092e-05, | |
| "loss": 1.3196, | |
| "step": 1624 | |
| }, | |
| { | |
| "epoch": 0.43999458801244756, | |
| "grad_norm": 0.258242666721344, | |
| "learning_rate": 9.934172123237173e-05, | |
| "loss": 1.3044, | |
| "step": 1626 | |
| }, | |
| { | |
| "epoch": 0.44053578676769045, | |
| "grad_norm": 0.2621035575866699, | |
| "learning_rate": 9.933661974260078e-05, | |
| "loss": 1.3111, | |
| "step": 1628 | |
| }, | |
| { | |
| "epoch": 0.4410769855229333, | |
| "grad_norm": 0.25349390506744385, | |
| "learning_rate": 9.93314986934506e-05, | |
| "loss": 1.3025, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.4416181842781762, | |
| "grad_norm": 0.2615620195865631, | |
| "learning_rate": 9.932635808695136e-05, | |
| "loss": 1.3291, | |
| "step": 1632 | |
| }, | |
| { | |
| "epoch": 0.442159383033419, | |
| "grad_norm": 0.2933880686759949, | |
| "learning_rate": 9.932119792514105e-05, | |
| "loss": 1.3327, | |
| "step": 1634 | |
| }, | |
| { | |
| "epoch": 0.4427005817886619, | |
| "grad_norm": 0.2584700286388397, | |
| "learning_rate": 9.931601821006544e-05, | |
| "loss": 1.3031, | |
| "step": 1636 | |
| }, | |
| { | |
| "epoch": 0.44324178054390473, | |
| "grad_norm": 0.2718084156513214, | |
| "learning_rate": 9.931081894377797e-05, | |
| "loss": 1.3053, | |
| "step": 1638 | |
| }, | |
| { | |
| "epoch": 0.4437829792991476, | |
| "grad_norm": 0.27105703949928284, | |
| "learning_rate": 9.93056001283399e-05, | |
| "loss": 1.3012, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.44432417805439045, | |
| "grad_norm": 0.27265292406082153, | |
| "learning_rate": 9.930036176582021e-05, | |
| "loss": 1.2957, | |
| "step": 1642 | |
| }, | |
| { | |
| "epoch": 0.44486537680963334, | |
| "grad_norm": 0.26121169328689575, | |
| "learning_rate": 9.929510385829564e-05, | |
| "loss": 1.3062, | |
| "step": 1644 | |
| }, | |
| { | |
| "epoch": 0.44540657556487623, | |
| "grad_norm": 0.26841971278190613, | |
| "learning_rate": 9.928982640785067e-05, | |
| "loss": 1.3192, | |
| "step": 1646 | |
| }, | |
| { | |
| "epoch": 0.44594777432011906, | |
| "grad_norm": 0.27634862065315247, | |
| "learning_rate": 9.928452941657755e-05, | |
| "loss": 1.3005, | |
| "step": 1648 | |
| }, | |
| { | |
| "epoch": 0.44648897307536195, | |
| "grad_norm": 0.25527122616767883, | |
| "learning_rate": 9.927921288657623e-05, | |
| "loss": 1.3121, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.4470301718306048, | |
| "grad_norm": 0.2733294665813446, | |
| "learning_rate": 9.927387681995443e-05, | |
| "loss": 1.3051, | |
| "step": 1652 | |
| }, | |
| { | |
| "epoch": 0.44757137058584767, | |
| "grad_norm": 0.2783257067203522, | |
| "learning_rate": 9.926852121882766e-05, | |
| "loss": 1.2947, | |
| "step": 1654 | |
| }, | |
| { | |
| "epoch": 0.4481125693410905, | |
| "grad_norm": 0.2672583758831024, | |
| "learning_rate": 9.926314608531911e-05, | |
| "loss": 1.3272, | |
| "step": 1656 | |
| }, | |
| { | |
| "epoch": 0.4486537680963334, | |
| "grad_norm": 0.2568219304084778, | |
| "learning_rate": 9.925775142155974e-05, | |
| "loss": 1.3025, | |
| "step": 1658 | |
| }, | |
| { | |
| "epoch": 0.4491949668515762, | |
| "grad_norm": 0.2576539218425751, | |
| "learning_rate": 9.925233722968826e-05, | |
| "loss": 1.2715, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.4497361656068191, | |
| "grad_norm": 0.25898897647857666, | |
| "learning_rate": 9.924690351185109e-05, | |
| "loss": 1.3039, | |
| "step": 1662 | |
| }, | |
| { | |
| "epoch": 0.45027736436206195, | |
| "grad_norm": 0.25795668363571167, | |
| "learning_rate": 9.924145027020242e-05, | |
| "loss": 1.3115, | |
| "step": 1664 | |
| }, | |
| { | |
| "epoch": 0.45081856311730484, | |
| "grad_norm": 0.2781166136264801, | |
| "learning_rate": 9.92359775069042e-05, | |
| "loss": 1.3017, | |
| "step": 1666 | |
| }, | |
| { | |
| "epoch": 0.45135976187254767, | |
| "grad_norm": 0.2871512770652771, | |
| "learning_rate": 9.923048522412608e-05, | |
| "loss": 1.3206, | |
| "step": 1668 | |
| }, | |
| { | |
| "epoch": 0.45190096062779056, | |
| "grad_norm": 0.27760595083236694, | |
| "learning_rate": 9.922497342404544e-05, | |
| "loss": 1.3214, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.4524421593830334, | |
| "grad_norm": 0.26959067583084106, | |
| "learning_rate": 9.921944210884746e-05, | |
| "loss": 1.3144, | |
| "step": 1672 | |
| }, | |
| { | |
| "epoch": 0.4529833581382763, | |
| "grad_norm": 0.2662011384963989, | |
| "learning_rate": 9.921389128072498e-05, | |
| "loss": 1.3022, | |
| "step": 1674 | |
| }, | |
| { | |
| "epoch": 0.45352455689351917, | |
| "grad_norm": 0.28014811873435974, | |
| "learning_rate": 9.920832094187861e-05, | |
| "loss": 1.3104, | |
| "step": 1676 | |
| }, | |
| { | |
| "epoch": 0.454065755648762, | |
| "grad_norm": 0.2560974955558777, | |
| "learning_rate": 9.920273109451673e-05, | |
| "loss": 1.3113, | |
| "step": 1678 | |
| }, | |
| { | |
| "epoch": 0.4546069544040049, | |
| "grad_norm": 0.285339891910553, | |
| "learning_rate": 9.91971217408554e-05, | |
| "loss": 1.3126, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.4551481531592477, | |
| "grad_norm": 0.29105204343795776, | |
| "learning_rate": 9.919149288311843e-05, | |
| "loss": 1.3248, | |
| "step": 1682 | |
| }, | |
| { | |
| "epoch": 0.4556893519144906, | |
| "grad_norm": 0.2868146002292633, | |
| "learning_rate": 9.918584452353739e-05, | |
| "loss": 1.3217, | |
| "step": 1684 | |
| }, | |
| { | |
| "epoch": 0.45623055066973345, | |
| "grad_norm": 0.26717278361320496, | |
| "learning_rate": 9.918017666435152e-05, | |
| "loss": 1.2991, | |
| "step": 1686 | |
| }, | |
| { | |
| "epoch": 0.45677174942497634, | |
| "grad_norm": 0.2560403048992157, | |
| "learning_rate": 9.917448930780786e-05, | |
| "loss": 1.3091, | |
| "step": 1688 | |
| }, | |
| { | |
| "epoch": 0.45731294818021917, | |
| "grad_norm": 0.2610042989253998, | |
| "learning_rate": 9.916878245616114e-05, | |
| "loss": 1.2948, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.45785414693546206, | |
| "grad_norm": 0.27322304248809814, | |
| "learning_rate": 9.916305611167382e-05, | |
| "loss": 1.3121, | |
| "step": 1692 | |
| }, | |
| { | |
| "epoch": 0.4583953456907049, | |
| "grad_norm": 0.26559844613075256, | |
| "learning_rate": 9.91573102766161e-05, | |
| "loss": 1.307, | |
| "step": 1694 | |
| }, | |
| { | |
| "epoch": 0.4589365444459478, | |
| "grad_norm": 0.2677384316921234, | |
| "learning_rate": 9.91515449532659e-05, | |
| "loss": 1.2925, | |
| "step": 1696 | |
| }, | |
| { | |
| "epoch": 0.4594777432011906, | |
| "grad_norm": 0.2670448422431946, | |
| "learning_rate": 9.914576014390888e-05, | |
| "loss": 1.3051, | |
| "step": 1698 | |
| }, | |
| { | |
| "epoch": 0.4600189419564335, | |
| "grad_norm": 0.2537919878959656, | |
| "learning_rate": 9.91399558508384e-05, | |
| "loss": 1.3047, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.46056014071167634, | |
| "grad_norm": 0.2712916433811188, | |
| "learning_rate": 9.913413207635555e-05, | |
| "loss": 1.2949, | |
| "step": 1702 | |
| }, | |
| { | |
| "epoch": 0.4611013394669192, | |
| "grad_norm": 0.27910125255584717, | |
| "learning_rate": 9.912828882276917e-05, | |
| "loss": 1.336, | |
| "step": 1704 | |
| }, | |
| { | |
| "epoch": 0.4616425382221621, | |
| "grad_norm": 0.25917065143585205, | |
| "learning_rate": 9.91224260923958e-05, | |
| "loss": 1.2938, | |
| "step": 1706 | |
| }, | |
| { | |
| "epoch": 0.46218373697740495, | |
| "grad_norm": 0.265024334192276, | |
| "learning_rate": 9.91165438875597e-05, | |
| "loss": 1.2876, | |
| "step": 1708 | |
| }, | |
| { | |
| "epoch": 0.46272493573264784, | |
| "grad_norm": 0.2637651860713959, | |
| "learning_rate": 9.911064221059286e-05, | |
| "loss": 1.3128, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.46326613448789067, | |
| "grad_norm": 0.25448864698410034, | |
| "learning_rate": 9.910472106383495e-05, | |
| "loss": 1.3289, | |
| "step": 1712 | |
| }, | |
| { | |
| "epoch": 0.46380733324313356, | |
| "grad_norm": 0.24903124570846558, | |
| "learning_rate": 9.909878044963346e-05, | |
| "loss": 1.3013, | |
| "step": 1714 | |
| }, | |
| { | |
| "epoch": 0.4643485319983764, | |
| "grad_norm": 0.258848637342453, | |
| "learning_rate": 9.909282037034347e-05, | |
| "loss": 1.3052, | |
| "step": 1716 | |
| }, | |
| { | |
| "epoch": 0.4648897307536193, | |
| "grad_norm": 0.25806304812431335, | |
| "learning_rate": 9.908684082832787e-05, | |
| "loss": 1.286, | |
| "step": 1718 | |
| }, | |
| { | |
| "epoch": 0.4654309295088621, | |
| "grad_norm": 0.26794132590293884, | |
| "learning_rate": 9.908084182595723e-05, | |
| "loss": 1.3069, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.465972128264105, | |
| "grad_norm": 0.26079118251800537, | |
| "learning_rate": 9.907482336560983e-05, | |
| "loss": 1.3145, | |
| "step": 1722 | |
| }, | |
| { | |
| "epoch": 0.46651332701934783, | |
| "grad_norm": 0.25958481431007385, | |
| "learning_rate": 9.906878544967169e-05, | |
| "loss": 1.3098, | |
| "step": 1724 | |
| }, | |
| { | |
| "epoch": 0.4670545257745907, | |
| "grad_norm": 0.2390812784433365, | |
| "learning_rate": 9.906272808053652e-05, | |
| "loss": 1.3085, | |
| "step": 1726 | |
| }, | |
| { | |
| "epoch": 0.46759572452983356, | |
| "grad_norm": 0.263637900352478, | |
| "learning_rate": 9.905665126060574e-05, | |
| "loss": 1.2933, | |
| "step": 1728 | |
| }, | |
| { | |
| "epoch": 0.46813692328507645, | |
| "grad_norm": 0.2462746798992157, | |
| "learning_rate": 9.90505549922885e-05, | |
| "loss": 1.2877, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.46867812204031933, | |
| "grad_norm": 0.244845911860466, | |
| "learning_rate": 9.904443927800164e-05, | |
| "loss": 1.325, | |
| "step": 1732 | |
| }, | |
| { | |
| "epoch": 0.46921932079556217, | |
| "grad_norm": 0.28249332308769226, | |
| "learning_rate": 9.903830412016974e-05, | |
| "loss": 1.313, | |
| "step": 1734 | |
| }, | |
| { | |
| "epoch": 0.46976051955080506, | |
| "grad_norm": 0.29556336998939514, | |
| "learning_rate": 9.903214952122504e-05, | |
| "loss": 1.3142, | |
| "step": 1736 | |
| }, | |
| { | |
| "epoch": 0.4703017183060479, | |
| "grad_norm": 0.2746431827545166, | |
| "learning_rate": 9.902597548360754e-05, | |
| "loss": 1.3096, | |
| "step": 1738 | |
| }, | |
| { | |
| "epoch": 0.4708429170612908, | |
| "grad_norm": 0.2979538142681122, | |
| "learning_rate": 9.901978200976492e-05, | |
| "loss": 1.2849, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.4713841158165336, | |
| "grad_norm": 0.2766527235507965, | |
| "learning_rate": 9.901356910215255e-05, | |
| "loss": 1.3089, | |
| "step": 1742 | |
| }, | |
| { | |
| "epoch": 0.4719253145717765, | |
| "grad_norm": 0.25000783801078796, | |
| "learning_rate": 9.900733676323353e-05, | |
| "loss": 1.308, | |
| "step": 1744 | |
| }, | |
| { | |
| "epoch": 0.47246651332701933, | |
| "grad_norm": 0.26226234436035156, | |
| "learning_rate": 9.900108499547864e-05, | |
| "loss": 1.3041, | |
| "step": 1746 | |
| }, | |
| { | |
| "epoch": 0.4730077120822622, | |
| "grad_norm": 0.2794544994831085, | |
| "learning_rate": 9.899481380136642e-05, | |
| "loss": 1.3312, | |
| "step": 1748 | |
| }, | |
| { | |
| "epoch": 0.47354891083750505, | |
| "grad_norm": 0.24771127104759216, | |
| "learning_rate": 9.898852318338303e-05, | |
| "loss": 1.2853, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.47409010959274794, | |
| "grad_norm": 0.2811632752418518, | |
| "learning_rate": 9.898221314402238e-05, | |
| "loss": 1.3019, | |
| "step": 1752 | |
| }, | |
| { | |
| "epoch": 0.4746313083479908, | |
| "grad_norm": 0.2812533378601074, | |
| "learning_rate": 9.897588368578608e-05, | |
| "loss": 1.3298, | |
| "step": 1754 | |
| }, | |
| { | |
| "epoch": 0.47517250710323367, | |
| "grad_norm": 0.25955653190612793, | |
| "learning_rate": 9.896953481118341e-05, | |
| "loss": 1.3093, | |
| "step": 1756 | |
| }, | |
| { | |
| "epoch": 0.4757137058584765, | |
| "grad_norm": 0.2653108537197113, | |
| "learning_rate": 9.896316652273136e-05, | |
| "loss": 1.2898, | |
| "step": 1758 | |
| }, | |
| { | |
| "epoch": 0.4762549046137194, | |
| "grad_norm": 0.27985796332359314, | |
| "learning_rate": 9.895677882295466e-05, | |
| "loss": 1.2928, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.4767961033689623, | |
| "grad_norm": 0.2889133393764496, | |
| "learning_rate": 9.895037171438568e-05, | |
| "loss": 1.3088, | |
| "step": 1762 | |
| }, | |
| { | |
| "epoch": 0.4773373021242051, | |
| "grad_norm": 0.2615009546279907, | |
| "learning_rate": 9.894394519956448e-05, | |
| "loss": 1.3212, | |
| "step": 1764 | |
| }, | |
| { | |
| "epoch": 0.477878500879448, | |
| "grad_norm": 0.24938960373401642, | |
| "learning_rate": 9.893749928103885e-05, | |
| "loss": 1.2982, | |
| "step": 1766 | |
| }, | |
| { | |
| "epoch": 0.47841969963469083, | |
| "grad_norm": 0.27132853865623474, | |
| "learning_rate": 9.893103396136427e-05, | |
| "loss": 1.294, | |
| "step": 1768 | |
| }, | |
| { | |
| "epoch": 0.4789608983899337, | |
| "grad_norm": 0.2632822096347809, | |
| "learning_rate": 9.89245492431039e-05, | |
| "loss": 1.2852, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.47950209714517655, | |
| "grad_norm": 0.27269670367240906, | |
| "learning_rate": 9.891804512882856e-05, | |
| "loss": 1.2934, | |
| "step": 1772 | |
| }, | |
| { | |
| "epoch": 0.48004329590041944, | |
| "grad_norm": 0.2572595179080963, | |
| "learning_rate": 9.891152162111683e-05, | |
| "loss": 1.2719, | |
| "step": 1774 | |
| }, | |
| { | |
| "epoch": 0.4805844946556623, | |
| "grad_norm": 0.2708267867565155, | |
| "learning_rate": 9.890497872255489e-05, | |
| "loss": 1.2907, | |
| "step": 1776 | |
| }, | |
| { | |
| "epoch": 0.48112569341090516, | |
| "grad_norm": 0.28407028317451477, | |
| "learning_rate": 9.889841643573671e-05, | |
| "loss": 1.2977, | |
| "step": 1778 | |
| }, | |
| { | |
| "epoch": 0.481666892166148, | |
| "grad_norm": 0.26248103380203247, | |
| "learning_rate": 9.889183476326386e-05, | |
| "loss": 1.2993, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.4822080909213909, | |
| "grad_norm": 0.26148512959480286, | |
| "learning_rate": 9.888523370774563e-05, | |
| "loss": 1.2893, | |
| "step": 1782 | |
| }, | |
| { | |
| "epoch": 0.4827492896766337, | |
| "grad_norm": 0.2815425395965576, | |
| "learning_rate": 9.8878613271799e-05, | |
| "loss": 1.3015, | |
| "step": 1784 | |
| }, | |
| { | |
| "epoch": 0.4832904884318766, | |
| "grad_norm": 0.26061713695526123, | |
| "learning_rate": 9.887197345804862e-05, | |
| "loss": 1.2781, | |
| "step": 1786 | |
| }, | |
| { | |
| "epoch": 0.4838316871871195, | |
| "grad_norm": 0.2641533613204956, | |
| "learning_rate": 9.886531426912683e-05, | |
| "loss": 1.2993, | |
| "step": 1788 | |
| }, | |
| { | |
| "epoch": 0.48437288594236233, | |
| "grad_norm": 0.25920137763023376, | |
| "learning_rate": 9.885863570767364e-05, | |
| "loss": 1.2955, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.4849140846976052, | |
| "grad_norm": 0.24002158641815186, | |
| "learning_rate": 9.885193777633676e-05, | |
| "loss": 1.2932, | |
| "step": 1792 | |
| }, | |
| { | |
| "epoch": 0.48545528345284805, | |
| "grad_norm": 0.2643393576145172, | |
| "learning_rate": 9.884522047777157e-05, | |
| "loss": 1.2963, | |
| "step": 1794 | |
| }, | |
| { | |
| "epoch": 0.48599648220809094, | |
| "grad_norm": 0.2522197663784027, | |
| "learning_rate": 9.883848381464112e-05, | |
| "loss": 1.2947, | |
| "step": 1796 | |
| }, | |
| { | |
| "epoch": 0.4865376809633338, | |
| "grad_norm": 0.2431286871433258, | |
| "learning_rate": 9.883172778961613e-05, | |
| "loss": 1.3112, | |
| "step": 1798 | |
| }, | |
| { | |
| "epoch": 0.48707887971857666, | |
| "grad_norm": 0.26892608404159546, | |
| "learning_rate": 9.882495240537505e-05, | |
| "loss": 1.2904, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.4876200784738195, | |
| "grad_norm": 0.2528528571128845, | |
| "learning_rate": 9.881815766460392e-05, | |
| "loss": 1.2949, | |
| "step": 1802 | |
| }, | |
| { | |
| "epoch": 0.4881612772290624, | |
| "grad_norm": 0.2614927291870117, | |
| "learning_rate": 9.881134356999652e-05, | |
| "loss": 1.288, | |
| "step": 1804 | |
| }, | |
| { | |
| "epoch": 0.4887024759843052, | |
| "grad_norm": 0.2523605227470398, | |
| "learning_rate": 9.880451012425426e-05, | |
| "loss": 1.3029, | |
| "step": 1806 | |
| }, | |
| { | |
| "epoch": 0.4892436747395481, | |
| "grad_norm": 0.24303248524665833, | |
| "learning_rate": 9.879765733008627e-05, | |
| "loss": 1.3107, | |
| "step": 1808 | |
| }, | |
| { | |
| "epoch": 0.48978487349479094, | |
| "grad_norm": 0.2470557987689972, | |
| "learning_rate": 9.879078519020933e-05, | |
| "loss": 1.2856, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.49032607225003383, | |
| "grad_norm": 0.2526317536830902, | |
| "learning_rate": 9.878389370734784e-05, | |
| "loss": 1.2965, | |
| "step": 1812 | |
| }, | |
| { | |
| "epoch": 0.49086727100527666, | |
| "grad_norm": 0.2483314871788025, | |
| "learning_rate": 9.877698288423394e-05, | |
| "loss": 1.3016, | |
| "step": 1814 | |
| }, | |
| { | |
| "epoch": 0.49140846976051955, | |
| "grad_norm": 0.24746839702129364, | |
| "learning_rate": 9.877005272360741e-05, | |
| "loss": 1.2944, | |
| "step": 1816 | |
| }, | |
| { | |
| "epoch": 0.49194966851576244, | |
| "grad_norm": 0.24739988148212433, | |
| "learning_rate": 9.876310322821568e-05, | |
| "loss": 1.3037, | |
| "step": 1818 | |
| }, | |
| { | |
| "epoch": 0.4924908672710053, | |
| "grad_norm": 0.2740204632282257, | |
| "learning_rate": 9.875613440081387e-05, | |
| "loss": 1.3116, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.49303206602624816, | |
| "grad_norm": 0.27116379141807556, | |
| "learning_rate": 9.874914624416475e-05, | |
| "loss": 1.288, | |
| "step": 1822 | |
| }, | |
| { | |
| "epoch": 0.493573264781491, | |
| "grad_norm": 0.24231554567813873, | |
| "learning_rate": 9.874213876103878e-05, | |
| "loss": 1.2975, | |
| "step": 1824 | |
| }, | |
| { | |
| "epoch": 0.4941144635367339, | |
| "grad_norm": 0.2590995728969574, | |
| "learning_rate": 9.873511195421402e-05, | |
| "loss": 1.2678, | |
| "step": 1826 | |
| }, | |
| { | |
| "epoch": 0.4946556622919767, | |
| "grad_norm": 0.25694531202316284, | |
| "learning_rate": 9.872806582647625e-05, | |
| "loss": 1.28, | |
| "step": 1828 | |
| }, | |
| { | |
| "epoch": 0.4951968610472196, | |
| "grad_norm": 0.25455620884895325, | |
| "learning_rate": 9.87210003806189e-05, | |
| "loss": 1.2942, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.49573805980246244, | |
| "grad_norm": 0.2639889121055603, | |
| "learning_rate": 9.871391561944302e-05, | |
| "loss": 1.3161, | |
| "step": 1832 | |
| }, | |
| { | |
| "epoch": 0.4962792585577053, | |
| "grad_norm": 0.271282821893692, | |
| "learning_rate": 9.870681154575737e-05, | |
| "loss": 1.3071, | |
| "step": 1834 | |
| }, | |
| { | |
| "epoch": 0.49682045731294816, | |
| "grad_norm": 0.26479372382164, | |
| "learning_rate": 9.869968816237833e-05, | |
| "loss": 1.2841, | |
| "step": 1836 | |
| }, | |
| { | |
| "epoch": 0.49736165606819105, | |
| "grad_norm": 0.26040130853652954, | |
| "learning_rate": 9.869254547212997e-05, | |
| "loss": 1.2989, | |
| "step": 1838 | |
| }, | |
| { | |
| "epoch": 0.4979028548234339, | |
| "grad_norm": 0.26563623547554016, | |
| "learning_rate": 9.868538347784396e-05, | |
| "loss": 1.2965, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.49844405357867677, | |
| "grad_norm": 0.26089224219322205, | |
| "learning_rate": 9.867820218235969e-05, | |
| "loss": 1.3071, | |
| "step": 1842 | |
| }, | |
| { | |
| "epoch": 0.4989852523339196, | |
| "grad_norm": 0.27151811122894287, | |
| "learning_rate": 9.867100158852412e-05, | |
| "loss": 1.287, | |
| "step": 1844 | |
| }, | |
| { | |
| "epoch": 0.4995264510891625, | |
| "grad_norm": 0.2477792203426361, | |
| "learning_rate": 9.866378169919192e-05, | |
| "loss": 1.2894, | |
| "step": 1846 | |
| }, | |
| { | |
| "epoch": 0.5000676498444053, | |
| "grad_norm": 0.24871942400932312, | |
| "learning_rate": 9.865654251722545e-05, | |
| "loss": 1.3024, | |
| "step": 1848 | |
| }, | |
| { | |
| "epoch": 0.5006088485996483, | |
| "grad_norm": 0.26377877593040466, | |
| "learning_rate": 9.86492840454946e-05, | |
| "loss": 1.2939, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.5011500473548911, | |
| "grad_norm": 0.258228063583374, | |
| "learning_rate": 9.8642006286877e-05, | |
| "loss": 1.291, | |
| "step": 1852 | |
| }, | |
| { | |
| "epoch": 0.5016912461101339, | |
| "grad_norm": 0.26982301473617554, | |
| "learning_rate": 9.86347092442579e-05, | |
| "loss": 1.2845, | |
| "step": 1854 | |
| }, | |
| { | |
| "epoch": 0.5022324448653768, | |
| "grad_norm": 0.24094600975513458, | |
| "learning_rate": 9.862739292053021e-05, | |
| "loss": 1.2744, | |
| "step": 1856 | |
| }, | |
| { | |
| "epoch": 0.5027736436206197, | |
| "grad_norm": 0.25840380787849426, | |
| "learning_rate": 9.862005731859442e-05, | |
| "loss": 1.2966, | |
| "step": 1858 | |
| }, | |
| { | |
| "epoch": 0.5033148423758625, | |
| "grad_norm": 0.26734429597854614, | |
| "learning_rate": 9.861270244135877e-05, | |
| "loss": 1.2856, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.5038560411311054, | |
| "grad_norm": 0.24431397020816803, | |
| "learning_rate": 9.860532829173903e-05, | |
| "loss": 1.2871, | |
| "step": 1862 | |
| }, | |
| { | |
| "epoch": 0.5043972398863482, | |
| "grad_norm": 0.25425857305526733, | |
| "learning_rate": 9.859793487265869e-05, | |
| "loss": 1.2822, | |
| "step": 1864 | |
| }, | |
| { | |
| "epoch": 0.5049384386415912, | |
| "grad_norm": 0.25332111120224, | |
| "learning_rate": 9.859052218704885e-05, | |
| "loss": 1.2723, | |
| "step": 1866 | |
| }, | |
| { | |
| "epoch": 0.505479637396834, | |
| "grad_norm": 0.24775418639183044, | |
| "learning_rate": 9.858309023784826e-05, | |
| "loss": 1.2934, | |
| "step": 1868 | |
| }, | |
| { | |
| "epoch": 0.5060208361520768, | |
| "grad_norm": 0.24880458414554596, | |
| "learning_rate": 9.857563902800328e-05, | |
| "loss": 1.3041, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.5065620349073197, | |
| "grad_norm": 0.2574135959148407, | |
| "learning_rate": 9.856816856046793e-05, | |
| "loss": 1.2855, | |
| "step": 1872 | |
| }, | |
| { | |
| "epoch": 0.5071032336625626, | |
| "grad_norm": 0.26873350143432617, | |
| "learning_rate": 9.856067883820386e-05, | |
| "loss": 1.3055, | |
| "step": 1874 | |
| }, | |
| { | |
| "epoch": 0.5076444324178054, | |
| "grad_norm": 0.23742420971393585, | |
| "learning_rate": 9.855316986418036e-05, | |
| "loss": 1.3029, | |
| "step": 1876 | |
| }, | |
| { | |
| "epoch": 0.5081856311730483, | |
| "grad_norm": 0.2398921549320221, | |
| "learning_rate": 9.854564164137432e-05, | |
| "loss": 1.2849, | |
| "step": 1878 | |
| }, | |
| { | |
| "epoch": 0.5087268299282912, | |
| "grad_norm": 0.25182288885116577, | |
| "learning_rate": 9.85380941727703e-05, | |
| "loss": 1.2981, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.509268028683534, | |
| "grad_norm": 0.23373378813266754, | |
| "learning_rate": 9.853052746136048e-05, | |
| "loss": 1.2772, | |
| "step": 1882 | |
| }, | |
| { | |
| "epoch": 0.5098092274387769, | |
| "grad_norm": 0.2581213712692261, | |
| "learning_rate": 9.852294151014466e-05, | |
| "loss": 1.3147, | |
| "step": 1884 | |
| }, | |
| { | |
| "epoch": 0.5103504261940197, | |
| "grad_norm": 0.26642751693725586, | |
| "learning_rate": 9.851533632213028e-05, | |
| "loss": 1.2885, | |
| "step": 1886 | |
| }, | |
| { | |
| "epoch": 0.5108916249492627, | |
| "grad_norm": 0.24029181897640228, | |
| "learning_rate": 9.850771190033237e-05, | |
| "loss": 1.297, | |
| "step": 1888 | |
| }, | |
| { | |
| "epoch": 0.5114328237045055, | |
| "grad_norm": 0.2555221915245056, | |
| "learning_rate": 9.850006824777364e-05, | |
| "loss": 1.284, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.5119740224597483, | |
| "grad_norm": 0.2723660171031952, | |
| "learning_rate": 9.849240536748439e-05, | |
| "loss": 1.2821, | |
| "step": 1892 | |
| }, | |
| { | |
| "epoch": 0.5125152212149912, | |
| "grad_norm": 0.24772705137729645, | |
| "learning_rate": 9.848472326250253e-05, | |
| "loss": 1.2743, | |
| "step": 1894 | |
| }, | |
| { | |
| "epoch": 0.5130564199702341, | |
| "grad_norm": 0.2344834804534912, | |
| "learning_rate": 9.847702193587365e-05, | |
| "loss": 1.286, | |
| "step": 1896 | |
| }, | |
| { | |
| "epoch": 0.5135976187254769, | |
| "grad_norm": 0.23948362469673157, | |
| "learning_rate": 9.846930139065088e-05, | |
| "loss": 1.2673, | |
| "step": 1898 | |
| }, | |
| { | |
| "epoch": 0.5141388174807198, | |
| "grad_norm": 0.27207908034324646, | |
| "learning_rate": 9.846156162989503e-05, | |
| "loss": 1.3041, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.5146800162359627, | |
| "grad_norm": 0.2407965511083603, | |
| "learning_rate": 9.845380265667454e-05, | |
| "loss": 1.2875, | |
| "step": 1902 | |
| }, | |
| { | |
| "epoch": 0.5152212149912055, | |
| "grad_norm": 0.2517203688621521, | |
| "learning_rate": 9.844602447406538e-05, | |
| "loss": 1.2855, | |
| "step": 1904 | |
| }, | |
| { | |
| "epoch": 0.5157624137464484, | |
| "grad_norm": 0.24267178773880005, | |
| "learning_rate": 9.843822708515123e-05, | |
| "loss": 1.2711, | |
| "step": 1906 | |
| }, | |
| { | |
| "epoch": 0.5163036125016912, | |
| "grad_norm": 0.23933006823062897, | |
| "learning_rate": 9.843041049302331e-05, | |
| "loss": 1.3094, | |
| "step": 1908 | |
| }, | |
| { | |
| "epoch": 0.5168448112569342, | |
| "grad_norm": 0.21948301792144775, | |
| "learning_rate": 9.842257470078054e-05, | |
| "loss": 1.2686, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.517386010012177, | |
| "grad_norm": 0.239594966173172, | |
| "learning_rate": 9.841471971152933e-05, | |
| "loss": 1.2959, | |
| "step": 1912 | |
| }, | |
| { | |
| "epoch": 0.5179272087674198, | |
| "grad_norm": 0.26850634813308716, | |
| "learning_rate": 9.840684552838385e-05, | |
| "loss": 1.2969, | |
| "step": 1914 | |
| }, | |
| { | |
| "epoch": 0.5184684075226627, | |
| "grad_norm": 0.26066869497299194, | |
| "learning_rate": 9.839895215446573e-05, | |
| "loss": 1.2935, | |
| "step": 1916 | |
| }, | |
| { | |
| "epoch": 0.5190096062779056, | |
| "grad_norm": 0.25288596749305725, | |
| "learning_rate": 9.839103959290433e-05, | |
| "loss": 1.2922, | |
| "step": 1918 | |
| }, | |
| { | |
| "epoch": 0.5195508050331484, | |
| "grad_norm": 0.24453966319561005, | |
| "learning_rate": 9.838310784683655e-05, | |
| "loss": 1.3058, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.5200920037883913, | |
| "grad_norm": 0.25353509187698364, | |
| "learning_rate": 9.837515691940689e-05, | |
| "loss": 1.3161, | |
| "step": 1922 | |
| }, | |
| { | |
| "epoch": 0.5206332025436341, | |
| "grad_norm": 0.24898375570774078, | |
| "learning_rate": 9.836718681376749e-05, | |
| "loss": 1.2925, | |
| "step": 1924 | |
| }, | |
| { | |
| "epoch": 0.521174401298877, | |
| "grad_norm": 0.2576977014541626, | |
| "learning_rate": 9.835919753307807e-05, | |
| "loss": 1.2916, | |
| "step": 1926 | |
| }, | |
| { | |
| "epoch": 0.5217156000541199, | |
| "grad_norm": 0.25432518124580383, | |
| "learning_rate": 9.8351189080506e-05, | |
| "loss": 1.2866, | |
| "step": 1928 | |
| }, | |
| { | |
| "epoch": 0.5222567988093627, | |
| "grad_norm": 0.2504200339317322, | |
| "learning_rate": 9.834316145922615e-05, | |
| "loss": 1.2728, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.5227979975646057, | |
| "grad_norm": 0.2627692222595215, | |
| "learning_rate": 9.83351146724211e-05, | |
| "loss": 1.2853, | |
| "step": 1932 | |
| }, | |
| { | |
| "epoch": 0.5233391963198485, | |
| "grad_norm": 0.2776716351509094, | |
| "learning_rate": 9.832704872328094e-05, | |
| "loss": 1.2881, | |
| "step": 1934 | |
| }, | |
| { | |
| "epoch": 0.5238803950750913, | |
| "grad_norm": 0.24669450521469116, | |
| "learning_rate": 9.831896361500344e-05, | |
| "loss": 1.2681, | |
| "step": 1936 | |
| }, | |
| { | |
| "epoch": 0.5244215938303342, | |
| "grad_norm": 0.24949464201927185, | |
| "learning_rate": 9.831085935079387e-05, | |
| "loss": 1.2851, | |
| "step": 1938 | |
| }, | |
| { | |
| "epoch": 0.5249627925855771, | |
| "grad_norm": 0.2585392892360687, | |
| "learning_rate": 9.830273593386518e-05, | |
| "loss": 1.2796, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.5255039913408199, | |
| "grad_norm": 0.26086801290512085, | |
| "learning_rate": 9.829459336743787e-05, | |
| "loss": 1.293, | |
| "step": 1942 | |
| }, | |
| { | |
| "epoch": 0.5260451900960628, | |
| "grad_norm": 0.25490057468414307, | |
| "learning_rate": 9.828643165474006e-05, | |
| "loss": 1.2824, | |
| "step": 1944 | |
| }, | |
| { | |
| "epoch": 0.5265863888513056, | |
| "grad_norm": 0.24865177273750305, | |
| "learning_rate": 9.827825079900739e-05, | |
| "loss": 1.2835, | |
| "step": 1946 | |
| }, | |
| { | |
| "epoch": 0.5271275876065485, | |
| "grad_norm": 0.25498902797698975, | |
| "learning_rate": 9.827005080348317e-05, | |
| "loss": 1.2931, | |
| "step": 1948 | |
| }, | |
| { | |
| "epoch": 0.5276687863617914, | |
| "grad_norm": 0.2585375905036926, | |
| "learning_rate": 9.826183167141828e-05, | |
| "loss": 1.2659, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.5282099851170342, | |
| "grad_norm": 0.2300305813550949, | |
| "learning_rate": 9.825359340607116e-05, | |
| "loss": 1.3019, | |
| "step": 1952 | |
| }, | |
| { | |
| "epoch": 0.528751183872277, | |
| "grad_norm": 0.24674038589000702, | |
| "learning_rate": 9.824533601070784e-05, | |
| "loss": 1.2784, | |
| "step": 1954 | |
| }, | |
| { | |
| "epoch": 0.52929238262752, | |
| "grad_norm": 0.23458759486675262, | |
| "learning_rate": 9.823705948860195e-05, | |
| "loss": 1.2779, | |
| "step": 1956 | |
| }, | |
| { | |
| "epoch": 0.5298335813827628, | |
| "grad_norm": 0.24736309051513672, | |
| "learning_rate": 9.822876384303472e-05, | |
| "loss": 1.3083, | |
| "step": 1958 | |
| }, | |
| { | |
| "epoch": 0.5303747801380057, | |
| "grad_norm": 0.25108450651168823, | |
| "learning_rate": 9.82204490772949e-05, | |
| "loss": 1.3044, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.5309159788932486, | |
| "grad_norm": 0.23308375477790833, | |
| "learning_rate": 9.82121151946789e-05, | |
| "loss": 1.2694, | |
| "step": 1962 | |
| }, | |
| { | |
| "epoch": 0.5314571776484914, | |
| "grad_norm": 0.2283206284046173, | |
| "learning_rate": 9.820376219849064e-05, | |
| "loss": 1.2735, | |
| "step": 1964 | |
| }, | |
| { | |
| "epoch": 0.5319983764037343, | |
| "grad_norm": 0.24121573567390442, | |
| "learning_rate": 9.819539009204164e-05, | |
| "loss": 1.2799, | |
| "step": 1966 | |
| }, | |
| { | |
| "epoch": 0.5325395751589771, | |
| "grad_norm": 0.24135661125183105, | |
| "learning_rate": 9.8186998878651e-05, | |
| "loss": 1.295, | |
| "step": 1968 | |
| }, | |
| { | |
| "epoch": 0.53308077391422, | |
| "grad_norm": 0.24390241503715515, | |
| "learning_rate": 9.817858856164542e-05, | |
| "loss": 1.2812, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.5336219726694629, | |
| "grad_norm": 0.24739502370357513, | |
| "learning_rate": 9.817015914435913e-05, | |
| "loss": 1.2872, | |
| "step": 1972 | |
| }, | |
| { | |
| "epoch": 0.5341631714247057, | |
| "grad_norm": 0.25517916679382324, | |
| "learning_rate": 9.816171063013395e-05, | |
| "loss": 1.2718, | |
| "step": 1974 | |
| }, | |
| { | |
| "epoch": 0.5347043701799485, | |
| "grad_norm": 0.25479528307914734, | |
| "learning_rate": 9.815324302231928e-05, | |
| "loss": 1.2952, | |
| "step": 1976 | |
| }, | |
| { | |
| "epoch": 0.5352455689351915, | |
| "grad_norm": 0.24998174607753754, | |
| "learning_rate": 9.814475632427206e-05, | |
| "loss": 1.2914, | |
| "step": 1978 | |
| }, | |
| { | |
| "epoch": 0.5357867676904343, | |
| "grad_norm": 0.2341603934764862, | |
| "learning_rate": 9.813625053935686e-05, | |
| "loss": 1.2793, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.5363279664456772, | |
| "grad_norm": 0.23716285824775696, | |
| "learning_rate": 9.812772567094574e-05, | |
| "loss": 1.2872, | |
| "step": 1982 | |
| }, | |
| { | |
| "epoch": 0.53686916520092, | |
| "grad_norm": 0.2324230819940567, | |
| "learning_rate": 9.81191817224184e-05, | |
| "loss": 1.2604, | |
| "step": 1984 | |
| }, | |
| { | |
| "epoch": 0.5374103639561629, | |
| "grad_norm": 0.24399405717849731, | |
| "learning_rate": 9.811061869716205e-05, | |
| "loss": 1.2972, | |
| "step": 1986 | |
| }, | |
| { | |
| "epoch": 0.5379515627114058, | |
| "grad_norm": 0.24572497606277466, | |
| "learning_rate": 9.810203659857145e-05, | |
| "loss": 1.2784, | |
| "step": 1988 | |
| }, | |
| { | |
| "epoch": 0.5384927614666486, | |
| "grad_norm": 0.22993844747543335, | |
| "learning_rate": 9.8093435430049e-05, | |
| "loss": 1.2886, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.5390339602218915, | |
| "grad_norm": 0.24518661201000214, | |
| "learning_rate": 9.808481519500458e-05, | |
| "loss": 1.2622, | |
| "step": 1992 | |
| }, | |
| { | |
| "epoch": 0.5395751589771344, | |
| "grad_norm": 0.2601888179779053, | |
| "learning_rate": 9.807617589685568e-05, | |
| "loss": 1.2739, | |
| "step": 1994 | |
| }, | |
| { | |
| "epoch": 0.5401163577323772, | |
| "grad_norm": 0.24736261367797852, | |
| "learning_rate": 9.80675175390273e-05, | |
| "loss": 1.2748, | |
| "step": 1996 | |
| }, | |
| { | |
| "epoch": 0.54065755648762, | |
| "grad_norm": 0.2332574725151062, | |
| "learning_rate": 9.805884012495203e-05, | |
| "loss": 1.2639, | |
| "step": 1998 | |
| }, | |
| { | |
| "epoch": 0.541198755242863, | |
| "grad_norm": 0.2662294805049896, | |
| "learning_rate": 9.805014365807004e-05, | |
| "loss": 1.2914, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.5417399539981058, | |
| "grad_norm": 0.28600943088531494, | |
| "learning_rate": 9.804142814182902e-05, | |
| "loss": 1.2657, | |
| "step": 2002 | |
| }, | |
| { | |
| "epoch": 0.5422811527533486, | |
| "grad_norm": 0.2814892530441284, | |
| "learning_rate": 9.803269357968416e-05, | |
| "loss": 1.2839, | |
| "step": 2004 | |
| }, | |
| { | |
| "epoch": 0.5428223515085915, | |
| "grad_norm": 0.24939605593681335, | |
| "learning_rate": 9.802393997509833e-05, | |
| "loss": 1.2692, | |
| "step": 2006 | |
| }, | |
| { | |
| "epoch": 0.5433635502638344, | |
| "grad_norm": 0.2562806308269501, | |
| "learning_rate": 9.801516733154181e-05, | |
| "loss": 1.291, | |
| "step": 2008 | |
| }, | |
| { | |
| "epoch": 0.5439047490190773, | |
| "grad_norm": 0.2617442011833191, | |
| "learning_rate": 9.800637565249255e-05, | |
| "loss": 1.2808, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.5444459477743201, | |
| "grad_norm": 0.2421412616968155, | |
| "learning_rate": 9.799756494143593e-05, | |
| "loss": 1.2733, | |
| "step": 2012 | |
| }, | |
| { | |
| "epoch": 0.5449871465295629, | |
| "grad_norm": 0.25231024622917175, | |
| "learning_rate": 9.798873520186497e-05, | |
| "loss": 1.2695, | |
| "step": 2014 | |
| }, | |
| { | |
| "epoch": 0.5455283452848059, | |
| "grad_norm": 0.25108659267425537, | |
| "learning_rate": 9.79798864372802e-05, | |
| "loss": 1.298, | |
| "step": 2016 | |
| }, | |
| { | |
| "epoch": 0.5460695440400487, | |
| "grad_norm": 0.24615678191184998, | |
| "learning_rate": 9.79710186511897e-05, | |
| "loss": 1.3127, | |
| "step": 2018 | |
| }, | |
| { | |
| "epoch": 0.5466107427952915, | |
| "grad_norm": 0.23436503112316132, | |
| "learning_rate": 9.796213184710904e-05, | |
| "loss": 1.2896, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.5471519415505345, | |
| "grad_norm": 0.23453901708126068, | |
| "learning_rate": 9.79532260285614e-05, | |
| "loss": 1.2761, | |
| "step": 2022 | |
| }, | |
| { | |
| "epoch": 0.5476931403057773, | |
| "grad_norm": 0.2413233071565628, | |
| "learning_rate": 9.794430119907748e-05, | |
| "loss": 1.2744, | |
| "step": 2024 | |
| }, | |
| { | |
| "epoch": 0.5482343390610201, | |
| "grad_norm": 0.2426893562078476, | |
| "learning_rate": 9.793535736219546e-05, | |
| "loss": 1.2615, | |
| "step": 2026 | |
| }, | |
| { | |
| "epoch": 0.548775537816263, | |
| "grad_norm": 0.23853014409542084, | |
| "learning_rate": 9.792639452146115e-05, | |
| "loss": 1.2897, | |
| "step": 2028 | |
| }, | |
| { | |
| "epoch": 0.5493167365715059, | |
| "grad_norm": 0.24866445362567902, | |
| "learning_rate": 9.791741268042784e-05, | |
| "loss": 1.2957, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.5498579353267488, | |
| "grad_norm": 0.24467822909355164, | |
| "learning_rate": 9.790841184265633e-05, | |
| "loss": 1.2867, | |
| "step": 2032 | |
| }, | |
| { | |
| "epoch": 0.5503991340819916, | |
| "grad_norm": 0.2393324077129364, | |
| "learning_rate": 9.7899392011715e-05, | |
| "loss": 1.3061, | |
| "step": 2034 | |
| }, | |
| { | |
| "epoch": 0.5509403328372344, | |
| "grad_norm": 0.23834531009197235, | |
| "learning_rate": 9.789035319117974e-05, | |
| "loss": 1.2957, | |
| "step": 2036 | |
| }, | |
| { | |
| "epoch": 0.5514815315924774, | |
| "grad_norm": 0.2603852450847626, | |
| "learning_rate": 9.788129538463397e-05, | |
| "loss": 1.2897, | |
| "step": 2038 | |
| }, | |
| { | |
| "epoch": 0.5520227303477202, | |
| "grad_norm": 0.26540425419807434, | |
| "learning_rate": 9.787221859566861e-05, | |
| "loss": 1.2829, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.552563929102963, | |
| "grad_norm": 0.25125250220298767, | |
| "learning_rate": 9.786312282788216e-05, | |
| "loss": 1.2708, | |
| "step": 2042 | |
| }, | |
| { | |
| "epoch": 0.553105127858206, | |
| "grad_norm": 0.23911471664905548, | |
| "learning_rate": 9.785400808488061e-05, | |
| "loss": 1.2949, | |
| "step": 2044 | |
| }, | |
| { | |
| "epoch": 0.5536463266134488, | |
| "grad_norm": 0.23871150612831116, | |
| "learning_rate": 9.784487437027746e-05, | |
| "loss": 1.2863, | |
| "step": 2046 | |
| }, | |
| { | |
| "epoch": 0.5541875253686916, | |
| "grad_norm": 0.25253376364707947, | |
| "learning_rate": 9.783572168769376e-05, | |
| "loss": 1.2797, | |
| "step": 2048 | |
| }, | |
| { | |
| "epoch": 0.5547287241239345, | |
| "grad_norm": 0.25140559673309326, | |
| "learning_rate": 9.782655004075807e-05, | |
| "loss": 1.2666, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.5552699228791774, | |
| "grad_norm": 0.25297242403030396, | |
| "learning_rate": 9.781735943310646e-05, | |
| "loss": 1.2935, | |
| "step": 2052 | |
| }, | |
| { | |
| "epoch": 0.5558111216344203, | |
| "grad_norm": 0.28536322712898254, | |
| "learning_rate": 9.780814986838252e-05, | |
| "loss": 1.2891, | |
| "step": 2054 | |
| }, | |
| { | |
| "epoch": 0.5563523203896631, | |
| "grad_norm": 0.28267911076545715, | |
| "learning_rate": 9.779892135023738e-05, | |
| "loss": 1.2846, | |
| "step": 2056 | |
| }, | |
| { | |
| "epoch": 0.5568935191449059, | |
| "grad_norm": 0.24850498139858246, | |
| "learning_rate": 9.778967388232964e-05, | |
| "loss": 1.2823, | |
| "step": 2058 | |
| }, | |
| { | |
| "epoch": 0.5574347179001489, | |
| "grad_norm": 0.4929364025592804, | |
| "learning_rate": 9.778040746832544e-05, | |
| "loss": 1.2681, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.5579759166553917, | |
| "grad_norm": 0.25423306226730347, | |
| "learning_rate": 9.777112211189843e-05, | |
| "loss": 1.2765, | |
| "step": 2062 | |
| }, | |
| { | |
| "epoch": 0.5585171154106345, | |
| "grad_norm": 0.23608753085136414, | |
| "learning_rate": 9.776181781672977e-05, | |
| "loss": 1.2756, | |
| "step": 2064 | |
| }, | |
| { | |
| "epoch": 0.5590583141658774, | |
| "grad_norm": 0.3117451071739197, | |
| "learning_rate": 9.775249458650812e-05, | |
| "loss": 1.2731, | |
| "step": 2066 | |
| }, | |
| { | |
| "epoch": 0.5595995129211203, | |
| "grad_norm": 0.2454603612422943, | |
| "learning_rate": 9.774315242492965e-05, | |
| "loss": 1.2821, | |
| "step": 2068 | |
| }, | |
| { | |
| "epoch": 0.5601407116763631, | |
| "grad_norm": 0.3214171528816223, | |
| "learning_rate": 9.773379133569804e-05, | |
| "loss": 1.2964, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.560681910431606, | |
| "grad_norm": 0.23589906096458435, | |
| "learning_rate": 9.772441132252448e-05, | |
| "loss": 1.2794, | |
| "step": 2072 | |
| }, | |
| { | |
| "epoch": 0.5612231091868489, | |
| "grad_norm": 0.23020370304584503, | |
| "learning_rate": 9.771501238912763e-05, | |
| "loss": 1.2753, | |
| "step": 2074 | |
| }, | |
| { | |
| "epoch": 0.5617643079420918, | |
| "grad_norm": 0.2368050515651703, | |
| "learning_rate": 9.77055945392337e-05, | |
| "loss": 1.3048, | |
| "step": 2076 | |
| }, | |
| { | |
| "epoch": 0.5623055066973346, | |
| "grad_norm": 0.2581866383552551, | |
| "learning_rate": 9.769615777657633e-05, | |
| "loss": 1.2765, | |
| "step": 2078 | |
| }, | |
| { | |
| "epoch": 0.5628467054525774, | |
| "grad_norm": 0.2481439858675003, | |
| "learning_rate": 9.768670210489675e-05, | |
| "loss": 1.2957, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.5633879042078204, | |
| "grad_norm": 0.2861919701099396, | |
| "learning_rate": 9.767722752794361e-05, | |
| "loss": 1.2647, | |
| "step": 2082 | |
| }, | |
| { | |
| "epoch": 0.5639291029630632, | |
| "grad_norm": 0.2552880346775055, | |
| "learning_rate": 9.766773404947309e-05, | |
| "loss": 1.2675, | |
| "step": 2084 | |
| }, | |
| { | |
| "epoch": 0.564470301718306, | |
| "grad_norm": 0.251891165971756, | |
| "learning_rate": 9.765822167324885e-05, | |
| "loss": 1.2799, | |
| "step": 2086 | |
| }, | |
| { | |
| "epoch": 0.5650115004735489, | |
| "grad_norm": 0.25395113229751587, | |
| "learning_rate": 9.764869040304205e-05, | |
| "loss": 1.2916, | |
| "step": 2088 | |
| }, | |
| { | |
| "epoch": 0.5655526992287918, | |
| "grad_norm": 0.2496347427368164, | |
| "learning_rate": 9.763914024263136e-05, | |
| "loss": 1.2722, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.5660938979840346, | |
| "grad_norm": 0.24722573161125183, | |
| "learning_rate": 9.762957119580287e-05, | |
| "loss": 1.2722, | |
| "step": 2092 | |
| }, | |
| { | |
| "epoch": 0.5666350967392775, | |
| "grad_norm": 0.23567502200603485, | |
| "learning_rate": 9.761998326635026e-05, | |
| "loss": 1.2681, | |
| "step": 2094 | |
| }, | |
| { | |
| "epoch": 0.5671762954945203, | |
| "grad_norm": 0.2396802455186844, | |
| "learning_rate": 9.76103764580746e-05, | |
| "loss": 1.2509, | |
| "step": 2096 | |
| }, | |
| { | |
| "epoch": 0.5677174942497633, | |
| "grad_norm": 0.24394263327121735, | |
| "learning_rate": 9.76007507747845e-05, | |
| "loss": 1.2863, | |
| "step": 2098 | |
| }, | |
| { | |
| "epoch": 0.5682586930050061, | |
| "grad_norm": 0.23184406757354736, | |
| "learning_rate": 9.759110622029604e-05, | |
| "loss": 1.2827, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.5687998917602489, | |
| "grad_norm": 0.24522008001804352, | |
| "learning_rate": 9.758144279843275e-05, | |
| "loss": 1.2716, | |
| "step": 2102 | |
| }, | |
| { | |
| "epoch": 0.5693410905154919, | |
| "grad_norm": 0.2323451191186905, | |
| "learning_rate": 9.757176051302573e-05, | |
| "loss": 1.2747, | |
| "step": 2104 | |
| }, | |
| { | |
| "epoch": 0.5698822892707347, | |
| "grad_norm": 0.26049986481666565, | |
| "learning_rate": 9.756205936791344e-05, | |
| "loss": 1.2976, | |
| "step": 2106 | |
| }, | |
| { | |
| "epoch": 0.5704234880259775, | |
| "grad_norm": 0.24207298457622528, | |
| "learning_rate": 9.75523393669419e-05, | |
| "loss": 1.2637, | |
| "step": 2108 | |
| }, | |
| { | |
| "epoch": 0.5709646867812204, | |
| "grad_norm": 0.23590189218521118, | |
| "learning_rate": 9.754260051396459e-05, | |
| "loss": 1.2725, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.5715058855364633, | |
| "grad_norm": 0.25714561343193054, | |
| "learning_rate": 9.753284281284243e-05, | |
| "loss": 1.2751, | |
| "step": 2112 | |
| }, | |
| { | |
| "epoch": 0.5720470842917061, | |
| "grad_norm": 0.2563743591308594, | |
| "learning_rate": 9.752306626744385e-05, | |
| "loss": 1.2552, | |
| "step": 2114 | |
| }, | |
| { | |
| "epoch": 0.572588283046949, | |
| "grad_norm": 0.23137059807777405, | |
| "learning_rate": 9.751327088164474e-05, | |
| "loss": 1.2826, | |
| "step": 2116 | |
| }, | |
| { | |
| "epoch": 0.5731294818021918, | |
| "grad_norm": 0.23491452634334564, | |
| "learning_rate": 9.750345665932844e-05, | |
| "loss": 1.2909, | |
| "step": 2118 | |
| }, | |
| { | |
| "epoch": 0.5736706805574348, | |
| "grad_norm": 0.23278982937335968, | |
| "learning_rate": 9.749362360438579e-05, | |
| "loss": 1.2904, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.5742118793126776, | |
| "grad_norm": 0.22500935196876526, | |
| "learning_rate": 9.748377172071508e-05, | |
| "loss": 1.2822, | |
| "step": 2122 | |
| }, | |
| { | |
| "epoch": 0.5747530780679204, | |
| "grad_norm": 0.23745082318782806, | |
| "learning_rate": 9.747390101222205e-05, | |
| "loss": 1.279, | |
| "step": 2124 | |
| }, | |
| { | |
| "epoch": 0.5752942768231633, | |
| "grad_norm": 0.24000118672847748, | |
| "learning_rate": 9.746401148281993e-05, | |
| "loss": 1.2806, | |
| "step": 2126 | |
| }, | |
| { | |
| "epoch": 0.5758354755784062, | |
| "grad_norm": 0.24468575417995453, | |
| "learning_rate": 9.74541031364294e-05, | |
| "loss": 1.2867, | |
| "step": 2128 | |
| }, | |
| { | |
| "epoch": 0.576376674333649, | |
| "grad_norm": 0.23120936751365662, | |
| "learning_rate": 9.744417597697859e-05, | |
| "loss": 1.2666, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.5769178730888919, | |
| "grad_norm": 0.25929006934165955, | |
| "learning_rate": 9.743423000840309e-05, | |
| "loss": 1.2672, | |
| "step": 2132 | |
| }, | |
| { | |
| "epoch": 0.5774590718441348, | |
| "grad_norm": 0.2530214786529541, | |
| "learning_rate": 9.742426523464598e-05, | |
| "loss": 1.2769, | |
| "step": 2134 | |
| }, | |
| { | |
| "epoch": 0.5780002705993776, | |
| "grad_norm": 0.2752387225627899, | |
| "learning_rate": 9.741428165965775e-05, | |
| "loss": 1.2562, | |
| "step": 2136 | |
| }, | |
| { | |
| "epoch": 0.5785414693546205, | |
| "grad_norm": 0.25394052267074585, | |
| "learning_rate": 9.740427928739638e-05, | |
| "loss": 1.2758, | |
| "step": 2138 | |
| }, | |
| { | |
| "epoch": 0.5790826681098633, | |
| "grad_norm": 0.25311702489852905, | |
| "learning_rate": 9.739425812182728e-05, | |
| "loss": 1.2603, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.5796238668651063, | |
| "grad_norm": 0.26108497381210327, | |
| "learning_rate": 9.738421816692329e-05, | |
| "loss": 1.2627, | |
| "step": 2142 | |
| }, | |
| { | |
| "epoch": 0.5801650656203491, | |
| "grad_norm": 0.2541772425174713, | |
| "learning_rate": 9.737415942666476e-05, | |
| "loss": 1.2752, | |
| "step": 2144 | |
| }, | |
| { | |
| "epoch": 0.5807062643755919, | |
| "grad_norm": 0.24984823167324066, | |
| "learning_rate": 9.736408190503943e-05, | |
| "loss": 1.2673, | |
| "step": 2146 | |
| }, | |
| { | |
| "epoch": 0.5812474631308348, | |
| "grad_norm": 0.2763904333114624, | |
| "learning_rate": 9.735398560604251e-05, | |
| "loss": 1.2936, | |
| "step": 2148 | |
| }, | |
| { | |
| "epoch": 0.5817886618860777, | |
| "grad_norm": 0.26247066259384155, | |
| "learning_rate": 9.734387053367669e-05, | |
| "loss": 1.2636, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.5823298606413205, | |
| "grad_norm": 0.27749454975128174, | |
| "learning_rate": 9.7333736691952e-05, | |
| "loss": 1.2857, | |
| "step": 2152 | |
| }, | |
| { | |
| "epoch": 0.5828710593965634, | |
| "grad_norm": 0.39380860328674316, | |
| "learning_rate": 9.732358408488602e-05, | |
| "loss": 1.2916, | |
| "step": 2154 | |
| }, | |
| { | |
| "epoch": 0.5834122581518062, | |
| "grad_norm": 0.2505074441432953, | |
| "learning_rate": 9.731341271650372e-05, | |
| "loss": 1.2548, | |
| "step": 2156 | |
| }, | |
| { | |
| "epoch": 0.5839534569070491, | |
| "grad_norm": 0.2549828588962555, | |
| "learning_rate": 9.730322259083751e-05, | |
| "loss": 1.2884, | |
| "step": 2158 | |
| }, | |
| { | |
| "epoch": 0.584494655662292, | |
| "grad_norm": 0.24714533984661102, | |
| "learning_rate": 9.729301371192724e-05, | |
| "loss": 1.2823, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.5850358544175348, | |
| "grad_norm": 0.24945247173309326, | |
| "learning_rate": 9.728278608382018e-05, | |
| "loss": 1.2976, | |
| "step": 2162 | |
| }, | |
| { | |
| "epoch": 0.5855770531727778, | |
| "grad_norm": 0.2512315511703491, | |
| "learning_rate": 9.727253971057109e-05, | |
| "loss": 1.2883, | |
| "step": 2164 | |
| }, | |
| { | |
| "epoch": 0.5861182519280206, | |
| "grad_norm": 0.2401745468378067, | |
| "learning_rate": 9.726227459624207e-05, | |
| "loss": 1.2637, | |
| "step": 2166 | |
| }, | |
| { | |
| "epoch": 0.5866594506832634, | |
| "grad_norm": 0.260251522064209, | |
| "learning_rate": 9.725199074490271e-05, | |
| "loss": 1.2618, | |
| "step": 2168 | |
| }, | |
| { | |
| "epoch": 0.5872006494385063, | |
| "grad_norm": 0.2533782124519348, | |
| "learning_rate": 9.724168816063004e-05, | |
| "loss": 1.2825, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.5877418481937492, | |
| "grad_norm": 0.2545458972454071, | |
| "learning_rate": 9.723136684750847e-05, | |
| "loss": 1.2784, | |
| "step": 2172 | |
| }, | |
| { | |
| "epoch": 0.588283046948992, | |
| "grad_norm": 0.24370916187763214, | |
| "learning_rate": 9.722102680962988e-05, | |
| "loss": 1.2601, | |
| "step": 2174 | |
| }, | |
| { | |
| "epoch": 0.5888242457042349, | |
| "grad_norm": 0.23707440495491028, | |
| "learning_rate": 9.721066805109353e-05, | |
| "loss": 1.2818, | |
| "step": 2176 | |
| }, | |
| { | |
| "epoch": 0.5893654444594777, | |
| "grad_norm": 0.22903890907764435, | |
| "learning_rate": 9.720029057600615e-05, | |
| "loss": 1.2686, | |
| "step": 2178 | |
| }, | |
| { | |
| "epoch": 0.5899066432147206, | |
| "grad_norm": 0.22820548713207245, | |
| "learning_rate": 9.718989438848182e-05, | |
| "loss": 1.2749, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.5904478419699635, | |
| "grad_norm": 0.2249859720468521, | |
| "learning_rate": 9.717947949264214e-05, | |
| "loss": 1.2649, | |
| "step": 2182 | |
| }, | |
| { | |
| "epoch": 0.5909890407252063, | |
| "grad_norm": 0.23568090796470642, | |
| "learning_rate": 9.716904589261602e-05, | |
| "loss": 1.2764, | |
| "step": 2184 | |
| }, | |
| { | |
| "epoch": 0.5915302394804492, | |
| "grad_norm": 0.24089080095291138, | |
| "learning_rate": 9.715859359253987e-05, | |
| "loss": 1.2801, | |
| "step": 2186 | |
| }, | |
| { | |
| "epoch": 0.5920714382356921, | |
| "grad_norm": 0.2259254902601242, | |
| "learning_rate": 9.714812259655746e-05, | |
| "loss": 1.2805, | |
| "step": 2188 | |
| }, | |
| { | |
| "epoch": 0.5926126369909349, | |
| "grad_norm": 0.23276519775390625, | |
| "learning_rate": 9.713763290881999e-05, | |
| "loss": 1.2635, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.5931538357461777, | |
| "grad_norm": 0.24884091317653656, | |
| "learning_rate": 9.712712453348607e-05, | |
| "loss": 1.2984, | |
| "step": 2192 | |
| }, | |
| { | |
| "epoch": 0.5936950345014207, | |
| "grad_norm": 0.23471422493457794, | |
| "learning_rate": 9.711659747472171e-05, | |
| "loss": 1.2742, | |
| "step": 2194 | |
| }, | |
| { | |
| "epoch": 0.5942362332566635, | |
| "grad_norm": 0.25790145993232727, | |
| "learning_rate": 9.710605173670037e-05, | |
| "loss": 1.2865, | |
| "step": 2196 | |
| }, | |
| { | |
| "epoch": 0.5947774320119064, | |
| "grad_norm": 0.24584504961967468, | |
| "learning_rate": 9.709548732360285e-05, | |
| "loss": 1.2826, | |
| "step": 2198 | |
| }, | |
| { | |
| "epoch": 0.5953186307671492, | |
| "grad_norm": 0.23682548105716705, | |
| "learning_rate": 9.708490423961741e-05, | |
| "loss": 1.2499, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.5958598295223921, | |
| "grad_norm": 0.24267072975635529, | |
| "learning_rate": 9.707430248893964e-05, | |
| "loss": 1.2514, | |
| "step": 2202 | |
| }, | |
| { | |
| "epoch": 0.596401028277635, | |
| "grad_norm": 0.2546815276145935, | |
| "learning_rate": 9.706368207577264e-05, | |
| "loss": 1.2755, | |
| "step": 2204 | |
| }, | |
| { | |
| "epoch": 0.5969422270328778, | |
| "grad_norm": 0.24322691559791565, | |
| "learning_rate": 9.70530430043268e-05, | |
| "loss": 1.2817, | |
| "step": 2206 | |
| }, | |
| { | |
| "epoch": 0.5974834257881206, | |
| "grad_norm": 0.22995691001415253, | |
| "learning_rate": 9.704238527882e-05, | |
| "loss": 1.2487, | |
| "step": 2208 | |
| }, | |
| { | |
| "epoch": 0.5980246245433636, | |
| "grad_norm": 0.25768396258354187, | |
| "learning_rate": 9.70317089034774e-05, | |
| "loss": 1.2956, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.5985658232986064, | |
| "grad_norm": 0.2691928744316101, | |
| "learning_rate": 9.702101388253167e-05, | |
| "loss": 1.2704, | |
| "step": 2212 | |
| }, | |
| { | |
| "epoch": 0.5991070220538492, | |
| "grad_norm": 0.2356652021408081, | |
| "learning_rate": 9.701030022022282e-05, | |
| "loss": 1.2548, | |
| "step": 2214 | |
| }, | |
| { | |
| "epoch": 0.5996482208090922, | |
| "grad_norm": 0.24094751477241516, | |
| "learning_rate": 9.699956792079825e-05, | |
| "loss": 1.2616, | |
| "step": 2216 | |
| }, | |
| { | |
| "epoch": 0.600189419564335, | |
| "grad_norm": 0.2429209202528, | |
| "learning_rate": 9.698881698851274e-05, | |
| "loss": 1.2603, | |
| "step": 2218 | |
| }, | |
| { | |
| "epoch": 0.6007306183195779, | |
| "grad_norm": 0.24263691902160645, | |
| "learning_rate": 9.69780474276285e-05, | |
| "loss": 1.2788, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.6012718170748207, | |
| "grad_norm": 0.25438740849494934, | |
| "learning_rate": 9.696725924241506e-05, | |
| "loss": 1.2823, | |
| "step": 2222 | |
| }, | |
| { | |
| "epoch": 0.6018130158300636, | |
| "grad_norm": 0.258472204208374, | |
| "learning_rate": 9.695645243714939e-05, | |
| "loss": 1.2673, | |
| "step": 2224 | |
| }, | |
| { | |
| "epoch": 0.6023542145853065, | |
| "grad_norm": 0.2571878433227539, | |
| "learning_rate": 9.694562701611583e-05, | |
| "loss": 1.295, | |
| "step": 2226 | |
| }, | |
| { | |
| "epoch": 0.6028954133405493, | |
| "grad_norm": 0.2430989295244217, | |
| "learning_rate": 9.693478298360607e-05, | |
| "loss": 1.2595, | |
| "step": 2228 | |
| }, | |
| { | |
| "epoch": 0.6034366120957921, | |
| "grad_norm": 0.23489908874034882, | |
| "learning_rate": 9.692392034391922e-05, | |
| "loss": 1.2773, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.6039778108510351, | |
| "grad_norm": 0.2507382035255432, | |
| "learning_rate": 9.691303910136171e-05, | |
| "loss": 1.2782, | |
| "step": 2232 | |
| }, | |
| { | |
| "epoch": 0.6045190096062779, | |
| "grad_norm": 0.23355726897716522, | |
| "learning_rate": 9.690213926024743e-05, | |
| "loss": 1.263, | |
| "step": 2234 | |
| }, | |
| { | |
| "epoch": 0.6050602083615207, | |
| "grad_norm": 0.2275291532278061, | |
| "learning_rate": 9.689122082489754e-05, | |
| "loss": 1.2677, | |
| "step": 2236 | |
| }, | |
| { | |
| "epoch": 0.6056014071167636, | |
| "grad_norm": 0.2314850389957428, | |
| "learning_rate": 9.688028379964068e-05, | |
| "loss": 1.2646, | |
| "step": 2238 | |
| }, | |
| { | |
| "epoch": 0.6061426058720065, | |
| "grad_norm": 0.24879969656467438, | |
| "learning_rate": 9.686932818881278e-05, | |
| "loss": 1.2704, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.6066838046272494, | |
| "grad_norm": 0.23156161606311798, | |
| "learning_rate": 9.685835399675717e-05, | |
| "loss": 1.2795, | |
| "step": 2242 | |
| }, | |
| { | |
| "epoch": 0.6072250033824922, | |
| "grad_norm": 0.22952421009540558, | |
| "learning_rate": 9.684736122782454e-05, | |
| "loss": 1.2597, | |
| "step": 2244 | |
| }, | |
| { | |
| "epoch": 0.6077662021377351, | |
| "grad_norm": 1.0009911060333252, | |
| "learning_rate": 9.683634988637293e-05, | |
| "loss": 1.2504, | |
| "step": 2246 | |
| }, | |
| { | |
| "epoch": 0.608307400892978, | |
| "grad_norm": 0.2649003267288208, | |
| "learning_rate": 9.682531997676777e-05, | |
| "loss": 1.2376, | |
| "step": 2248 | |
| }, | |
| { | |
| "epoch": 0.6088485996482208, | |
| "grad_norm": 0.31321823596954346, | |
| "learning_rate": 9.681427150338187e-05, | |
| "loss": 1.2607, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.6093897984034636, | |
| "grad_norm": 0.3142634332180023, | |
| "learning_rate": 9.680320447059532e-05, | |
| "loss": 1.261, | |
| "step": 2252 | |
| }, | |
| { | |
| "epoch": 0.6099309971587066, | |
| "grad_norm": 0.31247085332870483, | |
| "learning_rate": 9.679211888279565e-05, | |
| "loss": 1.2685, | |
| "step": 2254 | |
| }, | |
| { | |
| "epoch": 0.6104721959139494, | |
| "grad_norm": 0.25763556361198425, | |
| "learning_rate": 9.67810147443777e-05, | |
| "loss": 1.2542, | |
| "step": 2256 | |
| }, | |
| { | |
| "epoch": 0.6110133946691922, | |
| "grad_norm": 0.2788141667842865, | |
| "learning_rate": 9.676989205974367e-05, | |
| "loss": 1.2747, | |
| "step": 2258 | |
| }, | |
| { | |
| "epoch": 0.6115545934244351, | |
| "grad_norm": 0.26279813051223755, | |
| "learning_rate": 9.675875083330315e-05, | |
| "loss": 1.261, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.612095792179678, | |
| "grad_norm": 0.24764376878738403, | |
| "learning_rate": 9.674759106947302e-05, | |
| "loss": 1.2632, | |
| "step": 2262 | |
| }, | |
| { | |
| "epoch": 0.6126369909349209, | |
| "grad_norm": 0.2378121018409729, | |
| "learning_rate": 9.673641277267756e-05, | |
| "loss": 1.2569, | |
| "step": 2264 | |
| }, | |
| { | |
| "epoch": 0.6131781896901637, | |
| "grad_norm": 0.25457054376602173, | |
| "learning_rate": 9.672521594734838e-05, | |
| "loss": 1.2667, | |
| "step": 2266 | |
| }, | |
| { | |
| "epoch": 0.6137193884454065, | |
| "grad_norm": 0.2589806616306305, | |
| "learning_rate": 9.67140005979244e-05, | |
| "loss": 1.2515, | |
| "step": 2268 | |
| }, | |
| { | |
| "epoch": 0.6142605872006495, | |
| "grad_norm": 0.23375307023525238, | |
| "learning_rate": 9.670276672885195e-05, | |
| "loss": 1.2608, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.6148017859558923, | |
| "grad_norm": 0.22978229820728302, | |
| "learning_rate": 9.669151434458468e-05, | |
| "loss": 1.2516, | |
| "step": 2272 | |
| }, | |
| { | |
| "epoch": 0.6153429847111351, | |
| "grad_norm": 0.22958585619926453, | |
| "learning_rate": 9.668024344958353e-05, | |
| "loss": 1.2617, | |
| "step": 2274 | |
| }, | |
| { | |
| "epoch": 0.6158841834663781, | |
| "grad_norm": 0.22783328592777252, | |
| "learning_rate": 9.666895404831685e-05, | |
| "loss": 1.2732, | |
| "step": 2276 | |
| }, | |
| { | |
| "epoch": 0.6164253822216209, | |
| "grad_norm": 0.2413301318883896, | |
| "learning_rate": 9.665764614526027e-05, | |
| "loss": 1.2501, | |
| "step": 2278 | |
| }, | |
| { | |
| "epoch": 0.6169665809768637, | |
| "grad_norm": 0.2591399550437927, | |
| "learning_rate": 9.66463197448968e-05, | |
| "loss": 1.2657, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.6175077797321066, | |
| "grad_norm": 0.23001301288604736, | |
| "learning_rate": 9.663497485171675e-05, | |
| "loss": 1.2698, | |
| "step": 2282 | |
| }, | |
| { | |
| "epoch": 0.6180489784873495, | |
| "grad_norm": 0.21373826265335083, | |
| "learning_rate": 9.662361147021779e-05, | |
| "loss": 1.2651, | |
| "step": 2284 | |
| }, | |
| { | |
| "epoch": 0.6185901772425924, | |
| "grad_norm": 0.2302403599023819, | |
| "learning_rate": 9.66122296049049e-05, | |
| "loss": 1.2786, | |
| "step": 2286 | |
| }, | |
| { | |
| "epoch": 0.6191313759978352, | |
| "grad_norm": 0.24121953547000885, | |
| "learning_rate": 9.660082926029038e-05, | |
| "loss": 1.2639, | |
| "step": 2288 | |
| }, | |
| { | |
| "epoch": 0.619672574753078, | |
| "grad_norm": 0.22190925478935242, | |
| "learning_rate": 9.658941044089387e-05, | |
| "loss": 1.2472, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.620213773508321, | |
| "grad_norm": 0.22907890379428864, | |
| "learning_rate": 9.657797315124234e-05, | |
| "loss": 1.2475, | |
| "step": 2292 | |
| }, | |
| { | |
| "epoch": 0.6207549722635638, | |
| "grad_norm": 0.23821642994880676, | |
| "learning_rate": 9.656651739587008e-05, | |
| "loss": 1.2689, | |
| "step": 2294 | |
| }, | |
| { | |
| "epoch": 0.6212961710188066, | |
| "grad_norm": 0.25953301787376404, | |
| "learning_rate": 9.655504317931867e-05, | |
| "loss": 1.2587, | |
| "step": 2296 | |
| }, | |
| { | |
| "epoch": 0.6218373697740495, | |
| "grad_norm": 0.24054677784442902, | |
| "learning_rate": 9.654355050613705e-05, | |
| "loss": 1.2541, | |
| "step": 2298 | |
| }, | |
| { | |
| "epoch": 0.6223785685292924, | |
| "grad_norm": 0.23474815487861633, | |
| "learning_rate": 9.65320393808815e-05, | |
| "loss": 1.2605, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.6229197672845352, | |
| "grad_norm": 0.23981337249279022, | |
| "learning_rate": 9.652050980811551e-05, | |
| "loss": 1.267, | |
| "step": 2302 | |
| }, | |
| { | |
| "epoch": 0.6234609660397781, | |
| "grad_norm": 0.21820946037769318, | |
| "learning_rate": 9.650896179240997e-05, | |
| "loss": 1.2555, | |
| "step": 2304 | |
| }, | |
| { | |
| "epoch": 0.624002164795021, | |
| "grad_norm": 0.2165161520242691, | |
| "learning_rate": 9.64973953383431e-05, | |
| "loss": 1.271, | |
| "step": 2306 | |
| }, | |
| { | |
| "epoch": 0.6245433635502639, | |
| "grad_norm": 0.22105515003204346, | |
| "learning_rate": 9.648581045050035e-05, | |
| "loss": 1.2663, | |
| "step": 2308 | |
| }, | |
| { | |
| "epoch": 0.6250845623055067, | |
| "grad_norm": 0.22626088559627533, | |
| "learning_rate": 9.647420713347454e-05, | |
| "loss": 1.2645, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.6256257610607495, | |
| "grad_norm": 0.2347354292869568, | |
| "learning_rate": 9.646258539186577e-05, | |
| "loss": 1.2372, | |
| "step": 2312 | |
| }, | |
| { | |
| "epoch": 0.6261669598159925, | |
| "grad_norm": 0.2388308346271515, | |
| "learning_rate": 9.645094523028144e-05, | |
| "loss": 1.2652, | |
| "step": 2314 | |
| }, | |
| { | |
| "epoch": 0.6267081585712353, | |
| "grad_norm": 0.2252940982580185, | |
| "learning_rate": 9.643928665333628e-05, | |
| "loss": 1.2595, | |
| "step": 2316 | |
| }, | |
| { | |
| "epoch": 0.6272493573264781, | |
| "grad_norm": 0.24020199477672577, | |
| "learning_rate": 9.64276096656523e-05, | |
| "loss": 1.3079, | |
| "step": 2318 | |
| }, | |
| { | |
| "epoch": 0.627790556081721, | |
| "grad_norm": 0.23432402312755585, | |
| "learning_rate": 9.64159142718588e-05, | |
| "loss": 1.2718, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.6283317548369639, | |
| "grad_norm": 0.22962002456188202, | |
| "learning_rate": 9.640420047659239e-05, | |
| "loss": 1.2606, | |
| "step": 2322 | |
| }, | |
| { | |
| "epoch": 0.6288729535922067, | |
| "grad_norm": 0.21251855790615082, | |
| "learning_rate": 9.6392468284497e-05, | |
| "loss": 1.2568, | |
| "step": 2324 | |
| }, | |
| { | |
| "epoch": 0.6294141523474496, | |
| "grad_norm": 0.2140374481678009, | |
| "learning_rate": 9.63807177002238e-05, | |
| "loss": 1.276, | |
| "step": 2326 | |
| }, | |
| { | |
| "epoch": 0.6299553511026925, | |
| "grad_norm": 0.21366523206233978, | |
| "learning_rate": 9.636894872843132e-05, | |
| "loss": 1.2521, | |
| "step": 2328 | |
| }, | |
| { | |
| "epoch": 0.6304965498579354, | |
| "grad_norm": 0.22407646477222443, | |
| "learning_rate": 9.635716137378528e-05, | |
| "loss": 1.2692, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.6310377486131782, | |
| "grad_norm": 0.24414391815662384, | |
| "learning_rate": 9.63453556409588e-05, | |
| "loss": 1.2554, | |
| "step": 2332 | |
| }, | |
| { | |
| "epoch": 0.631578947368421, | |
| "grad_norm": 0.23787756264209747, | |
| "learning_rate": 9.633353153463219e-05, | |
| "loss": 1.2471, | |
| "step": 2334 | |
| }, | |
| { | |
| "epoch": 0.632120146123664, | |
| "grad_norm": 0.2248927801847458, | |
| "learning_rate": 9.63216890594931e-05, | |
| "loss": 1.2586, | |
| "step": 2336 | |
| }, | |
| { | |
| "epoch": 0.6326613448789068, | |
| "grad_norm": 0.2224208265542984, | |
| "learning_rate": 9.630982822023648e-05, | |
| "loss": 1.293, | |
| "step": 2338 | |
| }, | |
| { | |
| "epoch": 0.6332025436341496, | |
| "grad_norm": 0.2312854379415512, | |
| "learning_rate": 9.62979490215645e-05, | |
| "loss": 1.2604, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.6337437423893925, | |
| "grad_norm": 0.22154025733470917, | |
| "learning_rate": 9.628605146818665e-05, | |
| "loss": 1.2645, | |
| "step": 2342 | |
| }, | |
| { | |
| "epoch": 0.6342849411446354, | |
| "grad_norm": 0.2136823982000351, | |
| "learning_rate": 9.627413556481968e-05, | |
| "loss": 1.2375, | |
| "step": 2344 | |
| }, | |
| { | |
| "epoch": 0.6348261398998782, | |
| "grad_norm": 0.21541404724121094, | |
| "learning_rate": 9.626220131618763e-05, | |
| "loss": 1.2771, | |
| "step": 2346 | |
| }, | |
| { | |
| "epoch": 0.6353673386551211, | |
| "grad_norm": 0.22025029361248016, | |
| "learning_rate": 9.625024872702178e-05, | |
| "loss": 1.261, | |
| "step": 2348 | |
| }, | |
| { | |
| "epoch": 0.6359085374103639, | |
| "grad_norm": 0.2375534474849701, | |
| "learning_rate": 9.623827780206073e-05, | |
| "loss": 1.2808, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.6364497361656068, | |
| "grad_norm": 0.23297767341136932, | |
| "learning_rate": 9.62262885460503e-05, | |
| "loss": 1.2697, | |
| "step": 2352 | |
| }, | |
| { | |
| "epoch": 0.6369909349208497, | |
| "grad_norm": 0.24082797765731812, | |
| "learning_rate": 9.621428096374363e-05, | |
| "loss": 1.2347, | |
| "step": 2354 | |
| }, | |
| { | |
| "epoch": 0.6375321336760925, | |
| "grad_norm": 0.22009813785552979, | |
| "learning_rate": 9.620225505990105e-05, | |
| "loss": 1.2631, | |
| "step": 2356 | |
| }, | |
| { | |
| "epoch": 0.6380733324313355, | |
| "grad_norm": 0.22501374781131744, | |
| "learning_rate": 9.619021083929025e-05, | |
| "loss": 1.2563, | |
| "step": 2358 | |
| }, | |
| { | |
| "epoch": 0.6386145311865783, | |
| "grad_norm": 0.22494594752788544, | |
| "learning_rate": 9.61781483066861e-05, | |
| "loss": 1.2532, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.6391557299418211, | |
| "grad_norm": 0.3569008409976959, | |
| "learning_rate": 9.616606746687078e-05, | |
| "loss": 1.2684, | |
| "step": 2362 | |
| }, | |
| { | |
| "epoch": 0.639696928697064, | |
| "grad_norm": 207.0965576171875, | |
| "learning_rate": 9.61539683246337e-05, | |
| "loss": 1.3637, | |
| "step": 2364 | |
| }, | |
| { | |
| "epoch": 0.6402381274523069, | |
| "grad_norm": 0.4599202573299408, | |
| "learning_rate": 9.614185088477152e-05, | |
| "loss": 1.292, | |
| "step": 2366 | |
| }, | |
| { | |
| "epoch": 0.6407793262075497, | |
| "grad_norm": 0.3244802951812744, | |
| "learning_rate": 9.61297151520882e-05, | |
| "loss": 1.2585, | |
| "step": 2368 | |
| }, | |
| { | |
| "epoch": 0.6413205249627926, | |
| "grad_norm": 0.30332016944885254, | |
| "learning_rate": 9.611756113139488e-05, | |
| "loss": 1.2619, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.6418617237180354, | |
| "grad_norm": 0.2982909083366394, | |
| "learning_rate": 9.610538882751001e-05, | |
| "loss": 1.2637, | |
| "step": 2372 | |
| }, | |
| { | |
| "epoch": 0.6424029224732783, | |
| "grad_norm": 5.417288303375244, | |
| "learning_rate": 9.609319824525928e-05, | |
| "loss": 1.2713, | |
| "step": 2374 | |
| }, | |
| { | |
| "epoch": 0.6429441212285212, | |
| "grad_norm": 0.4198252260684967, | |
| "learning_rate": 9.608098938947562e-05, | |
| "loss": 1.2541, | |
| "step": 2376 | |
| }, | |
| { | |
| "epoch": 0.643485319983764, | |
| "grad_norm": 0.8178582191467285, | |
| "learning_rate": 9.606876226499918e-05, | |
| "loss": 1.2884, | |
| "step": 2378 | |
| }, | |
| { | |
| "epoch": 0.6440265187390068, | |
| "grad_norm": 0.33514025807380676, | |
| "learning_rate": 9.60565168766774e-05, | |
| "loss": 1.2719, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.6445677174942498, | |
| "grad_norm": 0.2973354756832123, | |
| "learning_rate": 9.60442532293649e-05, | |
| "loss": 1.2515, | |
| "step": 2382 | |
| }, | |
| { | |
| "epoch": 0.6451089162494926, | |
| "grad_norm": 0.4670213758945465, | |
| "learning_rate": 9.603197132792359e-05, | |
| "loss": 1.2665, | |
| "step": 2384 | |
| }, | |
| { | |
| "epoch": 0.6456501150047355, | |
| "grad_norm": 0.3197322189807892, | |
| "learning_rate": 9.60196711772226e-05, | |
| "loss": 1.2574, | |
| "step": 2386 | |
| }, | |
| { | |
| "epoch": 0.6461913137599784, | |
| "grad_norm": 1.1344069242477417, | |
| "learning_rate": 9.600735278213828e-05, | |
| "loss": 1.2689, | |
| "step": 2388 | |
| }, | |
| { | |
| "epoch": 0.6467325125152212, | |
| "grad_norm": 0.5379347801208496, | |
| "learning_rate": 9.599501614755425e-05, | |
| "loss": 1.249, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.6472737112704641, | |
| "grad_norm": 0.33201339840888977, | |
| "learning_rate": 9.598266127836131e-05, | |
| "loss": 1.2729, | |
| "step": 2392 | |
| }, | |
| { | |
| "epoch": 0.6478149100257069, | |
| "grad_norm": 8.969808578491211, | |
| "learning_rate": 9.597028817945753e-05, | |
| "loss": 1.2768, | |
| "step": 2394 | |
| }, | |
| { | |
| "epoch": 0.6483561087809498, | |
| "grad_norm": 0.3650411069393158, | |
| "learning_rate": 9.595789685574821e-05, | |
| "loss": 1.2511, | |
| "step": 2396 | |
| }, | |
| { | |
| "epoch": 0.6488973075361927, | |
| "grad_norm": 0.8414996862411499, | |
| "learning_rate": 9.594548731214583e-05, | |
| "loss": 1.2707, | |
| "step": 2398 | |
| }, | |
| { | |
| "epoch": 0.6494385062914355, | |
| "grad_norm": 0.5362874269485474, | |
| "learning_rate": 9.593305955357016e-05, | |
| "loss": 1.2453, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.6499797050466783, | |
| "grad_norm": 0.40546804666519165, | |
| "learning_rate": 9.592061358494813e-05, | |
| "loss": 1.2665, | |
| "step": 2402 | |
| }, | |
| { | |
| "epoch": 0.6505209038019213, | |
| "grad_norm": 0.29758453369140625, | |
| "learning_rate": 9.590814941121389e-05, | |
| "loss": 1.2538, | |
| "step": 2404 | |
| }, | |
| { | |
| "epoch": 0.6510621025571641, | |
| "grad_norm": 0.2636415660381317, | |
| "learning_rate": 9.589566703730888e-05, | |
| "loss": 1.2457, | |
| "step": 2406 | |
| }, | |
| { | |
| "epoch": 0.651603301312407, | |
| "grad_norm": 0.2844487130641937, | |
| "learning_rate": 9.588316646818168e-05, | |
| "loss": 1.257, | |
| "step": 2408 | |
| }, | |
| { | |
| "epoch": 0.6521445000676498, | |
| "grad_norm": 0.2777060568332672, | |
| "learning_rate": 9.587064770878808e-05, | |
| "loss": 1.2506, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.6526856988228927, | |
| "grad_norm": 0.2585492730140686, | |
| "learning_rate": 9.585811076409117e-05, | |
| "loss": 1.2472, | |
| "step": 2412 | |
| }, | |
| { | |
| "epoch": 0.6532268975781356, | |
| "grad_norm": 0.24312525987625122, | |
| "learning_rate": 9.584555563906116e-05, | |
| "loss": 1.2703, | |
| "step": 2414 | |
| }, | |
| { | |
| "epoch": 0.6537680963333784, | |
| "grad_norm": 0.2286798357963562, | |
| "learning_rate": 9.583298233867549e-05, | |
| "loss": 1.2582, | |
| "step": 2416 | |
| }, | |
| { | |
| "epoch": 0.6543092950886213, | |
| "grad_norm": 0.22804994881153107, | |
| "learning_rate": 9.582039086791883e-05, | |
| "loss": 1.2538, | |
| "step": 2418 | |
| }, | |
| { | |
| "epoch": 0.6548504938438642, | |
| "grad_norm": 0.2244635969400406, | |
| "learning_rate": 9.580778123178303e-05, | |
| "loss": 1.2481, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.655391692599107, | |
| "grad_norm": 0.22303158044815063, | |
| "learning_rate": 9.579515343526714e-05, | |
| "loss": 1.2574, | |
| "step": 2422 | |
| }, | |
| { | |
| "epoch": 0.6559328913543498, | |
| "grad_norm": 0.2208811491727829, | |
| "learning_rate": 9.578250748337742e-05, | |
| "loss": 1.2579, | |
| "step": 2424 | |
| }, | |
| { | |
| "epoch": 0.6564740901095928, | |
| "grad_norm": 0.20853403210639954, | |
| "learning_rate": 9.576984338112736e-05, | |
| "loss": 1.2619, | |
| "step": 2426 | |
| }, | |
| { | |
| "epoch": 0.6570152888648356, | |
| "grad_norm": 0.20974035561084747, | |
| "learning_rate": 9.575716113353757e-05, | |
| "loss": 1.2605, | |
| "step": 2428 | |
| }, | |
| { | |
| "epoch": 0.6575564876200785, | |
| "grad_norm": 0.22891463339328766, | |
| "learning_rate": 9.57444607456359e-05, | |
| "loss": 1.2586, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.6580976863753213, | |
| "grad_norm": 0.21693287789821625, | |
| "learning_rate": 9.57317422224574e-05, | |
| "loss": 1.2505, | |
| "step": 2432 | |
| }, | |
| { | |
| "epoch": 0.6586388851305642, | |
| "grad_norm": 0.21806494891643524, | |
| "learning_rate": 9.57190055690443e-05, | |
| "loss": 1.261, | |
| "step": 2434 | |
| }, | |
| { | |
| "epoch": 0.6591800838858071, | |
| "grad_norm": 0.24015147984027863, | |
| "learning_rate": 9.570625079044601e-05, | |
| "loss": 1.2564, | |
| "step": 2436 | |
| }, | |
| { | |
| "epoch": 0.6597212826410499, | |
| "grad_norm": 0.26577669382095337, | |
| "learning_rate": 9.569347789171912e-05, | |
| "loss": 1.2716, | |
| "step": 2438 | |
| }, | |
| { | |
| "epoch": 0.6602624813962927, | |
| "grad_norm": 0.2382255643606186, | |
| "learning_rate": 9.568068687792741e-05, | |
| "loss": 1.2465, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.6608036801515357, | |
| "grad_norm": 0.22770415246486664, | |
| "learning_rate": 9.566787775414188e-05, | |
| "loss": 1.2229, | |
| "step": 2442 | |
| }, | |
| { | |
| "epoch": 0.6613448789067785, | |
| "grad_norm": 0.23449081182479858, | |
| "learning_rate": 9.565505052544065e-05, | |
| "loss": 1.2582, | |
| "step": 2444 | |
| }, | |
| { | |
| "epoch": 0.6618860776620213, | |
| "grad_norm": 0.22105945646762848, | |
| "learning_rate": 9.564220519690903e-05, | |
| "loss": 1.2505, | |
| "step": 2446 | |
| }, | |
| { | |
| "epoch": 0.6624272764172643, | |
| "grad_norm": 0.22349369525909424, | |
| "learning_rate": 9.562934177363953e-05, | |
| "loss": 1.2578, | |
| "step": 2448 | |
| }, | |
| { | |
| "epoch": 0.6629684751725071, | |
| "grad_norm": 0.23770608007907867, | |
| "learning_rate": 9.561646026073184e-05, | |
| "loss": 1.2399, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.66350967392775, | |
| "grad_norm": 0.2204604148864746, | |
| "learning_rate": 9.56035606632928e-05, | |
| "loss": 1.2512, | |
| "step": 2452 | |
| }, | |
| { | |
| "epoch": 0.6640508726829928, | |
| "grad_norm": 0.2204030454158783, | |
| "learning_rate": 9.559064298643638e-05, | |
| "loss": 1.2821, | |
| "step": 2454 | |
| }, | |
| { | |
| "epoch": 0.6645920714382357, | |
| "grad_norm": 0.2169465720653534, | |
| "learning_rate": 9.55777072352838e-05, | |
| "loss": 1.2529, | |
| "step": 2456 | |
| }, | |
| { | |
| "epoch": 0.6651332701934786, | |
| "grad_norm": 0.2273695021867752, | |
| "learning_rate": 9.55647534149634e-05, | |
| "loss": 1.2497, | |
| "step": 2458 | |
| }, | |
| { | |
| "epoch": 0.6656744689487214, | |
| "grad_norm": 0.22077496349811554, | |
| "learning_rate": 9.555178153061069e-05, | |
| "loss": 1.2433, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.6662156677039642, | |
| "grad_norm": 0.2203417718410492, | |
| "learning_rate": 9.553879158736833e-05, | |
| "loss": 1.2464, | |
| "step": 2462 | |
| }, | |
| { | |
| "epoch": 0.6667568664592072, | |
| "grad_norm": 0.22205059230327606, | |
| "learning_rate": 9.552578359038617e-05, | |
| "loss": 1.2611, | |
| "step": 2464 | |
| }, | |
| { | |
| "epoch": 0.66729806521445, | |
| "grad_norm": 0.2206515222787857, | |
| "learning_rate": 9.551275754482119e-05, | |
| "loss": 1.2624, | |
| "step": 2466 | |
| }, | |
| { | |
| "epoch": 0.6678392639696928, | |
| "grad_norm": 0.21758343279361725, | |
| "learning_rate": 9.549971345583753e-05, | |
| "loss": 1.2406, | |
| "step": 2468 | |
| }, | |
| { | |
| "epoch": 0.6683804627249358, | |
| "grad_norm": 0.21517138183116913, | |
| "learning_rate": 9.548665132860647e-05, | |
| "loss": 1.2538, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 0.6689216614801786, | |
| "grad_norm": 0.21490350365638733, | |
| "learning_rate": 9.547357116830648e-05, | |
| "loss": 1.2534, | |
| "step": 2472 | |
| }, | |
| { | |
| "epoch": 0.6694628602354215, | |
| "grad_norm": 0.2156359702348709, | |
| "learning_rate": 9.546047298012315e-05, | |
| "loss": 1.2459, | |
| "step": 2474 | |
| }, | |
| { | |
| "epoch": 0.6700040589906643, | |
| "grad_norm": 0.2196791172027588, | |
| "learning_rate": 9.544735676924923e-05, | |
| "loss": 1.2534, | |
| "step": 2476 | |
| }, | |
| { | |
| "epoch": 0.6705452577459072, | |
| "grad_norm": 0.22666549682617188, | |
| "learning_rate": 9.54342225408846e-05, | |
| "loss": 1.252, | |
| "step": 2478 | |
| }, | |
| { | |
| "epoch": 0.6710864565011501, | |
| "grad_norm": 0.2314993143081665, | |
| "learning_rate": 9.54210703002363e-05, | |
| "loss": 1.2478, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.6716276552563929, | |
| "grad_norm": 0.2225077599287033, | |
| "learning_rate": 9.54079000525185e-05, | |
| "loss": 1.2465, | |
| "step": 2482 | |
| }, | |
| { | |
| "epoch": 0.6721688540116357, | |
| "grad_norm": 0.22268906235694885, | |
| "learning_rate": 9.539471180295249e-05, | |
| "loss": 1.2453, | |
| "step": 2484 | |
| }, | |
| { | |
| "epoch": 0.6727100527668787, | |
| "grad_norm": 0.30744513869285583, | |
| "learning_rate": 9.538150555676677e-05, | |
| "loss": 1.2874, | |
| "step": 2486 | |
| }, | |
| { | |
| "epoch": 0.6732512515221215, | |
| "grad_norm": 0.27435171604156494, | |
| "learning_rate": 9.536828131919686e-05, | |
| "loss": 1.2533, | |
| "step": 2488 | |
| }, | |
| { | |
| "epoch": 0.6737924502773643, | |
| "grad_norm": 0.5657795667648315, | |
| "learning_rate": 9.535503909548553e-05, | |
| "loss": 1.2567, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 0.6743336490326072, | |
| "grad_norm": 0.4795803129673004, | |
| "learning_rate": 9.53417788908826e-05, | |
| "loss": 1.2563, | |
| "step": 2492 | |
| }, | |
| { | |
| "epoch": 0.6748748477878501, | |
| "grad_norm": 0.3125123977661133, | |
| "learning_rate": 9.532850071064503e-05, | |
| "loss": 1.251, | |
| "step": 2494 | |
| }, | |
| { | |
| "epoch": 0.675416046543093, | |
| "grad_norm": 0.2949443459510803, | |
| "learning_rate": 9.531520456003696e-05, | |
| "loss": 1.2491, | |
| "step": 2496 | |
| }, | |
| { | |
| "epoch": 0.6759572452983358, | |
| "grad_norm": 0.289389967918396, | |
| "learning_rate": 9.530189044432959e-05, | |
| "loss": 1.2571, | |
| "step": 2498 | |
| }, | |
| { | |
| "epoch": 0.6764984440535787, | |
| "grad_norm": 0.24411126971244812, | |
| "learning_rate": 9.528855836880127e-05, | |
| "loss": 1.2528, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.6770396428088216, | |
| "grad_norm": 0.38176965713500977, | |
| "learning_rate": 9.527520833873748e-05, | |
| "loss": 1.2462, | |
| "step": 2502 | |
| }, | |
| { | |
| "epoch": 0.6775808415640644, | |
| "grad_norm": 0.25295090675354004, | |
| "learning_rate": 9.52618403594308e-05, | |
| "loss": 1.2601, | |
| "step": 2504 | |
| }, | |
| { | |
| "epoch": 0.6781220403193072, | |
| "grad_norm": 0.24630951881408691, | |
| "learning_rate": 9.524845443618091e-05, | |
| "loss": 1.2398, | |
| "step": 2506 | |
| }, | |
| { | |
| "epoch": 0.6786632390745502, | |
| "grad_norm": 0.25156068801879883, | |
| "learning_rate": 9.523505057429466e-05, | |
| "loss": 1.2429, | |
| "step": 2508 | |
| }, | |
| { | |
| "epoch": 0.679204437829793, | |
| "grad_norm": 0.23003700375556946, | |
| "learning_rate": 9.522162877908596e-05, | |
| "loss": 1.2569, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 0.6797456365850358, | |
| "grad_norm": 0.2248392552137375, | |
| "learning_rate": 9.520818905587585e-05, | |
| "loss": 1.2506, | |
| "step": 2512 | |
| }, | |
| { | |
| "epoch": 0.6802868353402787, | |
| "grad_norm": 0.22383219003677368, | |
| "learning_rate": 9.519473140999246e-05, | |
| "loss": 1.2294, | |
| "step": 2514 | |
| }, | |
| { | |
| "epoch": 0.6808280340955216, | |
| "grad_norm": 0.22723117470741272, | |
| "learning_rate": 9.518125584677106e-05, | |
| "loss": 1.2658, | |
| "step": 2516 | |
| }, | |
| { | |
| "epoch": 0.6813692328507645, | |
| "grad_norm": 0.24425800144672394, | |
| "learning_rate": 9.516776237155402e-05, | |
| "loss": 1.233, | |
| "step": 2518 | |
| }, | |
| { | |
| "epoch": 0.6819104316060073, | |
| "grad_norm": 0.22345170378684998, | |
| "learning_rate": 9.515425098969075e-05, | |
| "loss": 1.248, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.6824516303612501, | |
| "grad_norm": 0.21297229826450348, | |
| "learning_rate": 9.514072170653782e-05, | |
| "loss": 1.2453, | |
| "step": 2522 | |
| }, | |
| { | |
| "epoch": 0.6829928291164931, | |
| "grad_norm": 0.21216444671154022, | |
| "learning_rate": 9.51271745274589e-05, | |
| "loss": 1.2473, | |
| "step": 2524 | |
| }, | |
| { | |
| "epoch": 0.6835340278717359, | |
| "grad_norm": 0.2091735154390335, | |
| "learning_rate": 9.511360945782472e-05, | |
| "loss": 1.2451, | |
| "step": 2526 | |
| }, | |
| { | |
| "epoch": 0.6840752266269787, | |
| "grad_norm": 0.21291106939315796, | |
| "learning_rate": 9.510002650301313e-05, | |
| "loss": 1.2772, | |
| "step": 2528 | |
| }, | |
| { | |
| "epoch": 0.6846164253822217, | |
| "grad_norm": 0.21953986585140228, | |
| "learning_rate": 9.508642566840901e-05, | |
| "loss": 1.2533, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 0.6851576241374645, | |
| "grad_norm": 0.21948380768299103, | |
| "learning_rate": 9.507280695940446e-05, | |
| "loss": 1.2797, | |
| "step": 2532 | |
| }, | |
| { | |
| "epoch": 0.6856988228927073, | |
| "grad_norm": 0.21971148252487183, | |
| "learning_rate": 9.505917038139851e-05, | |
| "loss": 1.2609, | |
| "step": 2534 | |
| }, | |
| { | |
| "epoch": 0.6862400216479502, | |
| "grad_norm": 0.21478046476840973, | |
| "learning_rate": 9.504551593979738e-05, | |
| "loss": 1.2625, | |
| "step": 2536 | |
| }, | |
| { | |
| "epoch": 0.6867812204031931, | |
| "grad_norm": 0.21927322447299957, | |
| "learning_rate": 9.503184364001431e-05, | |
| "loss": 1.2415, | |
| "step": 2538 | |
| }, | |
| { | |
| "epoch": 0.687322419158436, | |
| "grad_norm": 0.2084941267967224, | |
| "learning_rate": 9.501815348746971e-05, | |
| "loss": 1.2455, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 0.6878636179136788, | |
| "grad_norm": 0.20336540043354034, | |
| "learning_rate": 9.500444548759095e-05, | |
| "loss": 1.2505, | |
| "step": 2542 | |
| }, | |
| { | |
| "epoch": 0.6884048166689216, | |
| "grad_norm": 0.21661430597305298, | |
| "learning_rate": 9.499071964581256e-05, | |
| "loss": 1.235, | |
| "step": 2544 | |
| }, | |
| { | |
| "epoch": 0.6889460154241646, | |
| "grad_norm": 0.2240605354309082, | |
| "learning_rate": 9.497697596757609e-05, | |
| "loss": 1.2546, | |
| "step": 2546 | |
| }, | |
| { | |
| "epoch": 0.6894872141794074, | |
| "grad_norm": 0.2289547622203827, | |
| "learning_rate": 9.496321445833022e-05, | |
| "loss": 1.2387, | |
| "step": 2548 | |
| }, | |
| { | |
| "epoch": 0.6900284129346502, | |
| "grad_norm": 0.22886811196804047, | |
| "learning_rate": 9.494943512353063e-05, | |
| "loss": 1.2531, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.6905696116898931, | |
| "grad_norm": 0.2151922732591629, | |
| "learning_rate": 9.493563796864014e-05, | |
| "loss": 1.2447, | |
| "step": 2552 | |
| }, | |
| { | |
| "epoch": 0.691110810445136, | |
| "grad_norm": 0.2263440489768982, | |
| "learning_rate": 9.492182299912857e-05, | |
| "loss": 1.245, | |
| "step": 2554 | |
| }, | |
| { | |
| "epoch": 0.6916520092003788, | |
| "grad_norm": 0.23101641237735748, | |
| "learning_rate": 9.490799022047286e-05, | |
| "loss": 1.2253, | |
| "step": 2556 | |
| }, | |
| { | |
| "epoch": 0.6921932079556217, | |
| "grad_norm": 0.2258201241493225, | |
| "learning_rate": 9.489413963815694e-05, | |
| "loss": 1.2477, | |
| "step": 2558 | |
| }, | |
| { | |
| "epoch": 0.6927344067108646, | |
| "grad_norm": 0.2227460741996765, | |
| "learning_rate": 9.488027125767187e-05, | |
| "loss": 1.2215, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 0.6932756054661074, | |
| "grad_norm": 0.2213139533996582, | |
| "learning_rate": 9.48663850845157e-05, | |
| "loss": 1.2308, | |
| "step": 2562 | |
| }, | |
| { | |
| "epoch": 0.6938168042213503, | |
| "grad_norm": 0.22192241251468658, | |
| "learning_rate": 9.485248112419363e-05, | |
| "loss": 1.2487, | |
| "step": 2564 | |
| }, | |
| { | |
| "epoch": 0.6943580029765931, | |
| "grad_norm": 0.21532469987869263, | |
| "learning_rate": 9.483855938221777e-05, | |
| "loss": 1.2498, | |
| "step": 2566 | |
| }, | |
| { | |
| "epoch": 0.6948992017318361, | |
| "grad_norm": 0.21143551170825958, | |
| "learning_rate": 9.482461986410743e-05, | |
| "loss": 1.2453, | |
| "step": 2568 | |
| }, | |
| { | |
| "epoch": 0.6954404004870789, | |
| "grad_norm": 0.21282954514026642, | |
| "learning_rate": 9.481066257538886e-05, | |
| "loss": 1.2499, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 0.6959815992423217, | |
| "grad_norm": 0.219988152384758, | |
| "learning_rate": 9.47966875215954e-05, | |
| "loss": 1.2478, | |
| "step": 2572 | |
| }, | |
| { | |
| "epoch": 0.6965227979975646, | |
| "grad_norm": 0.21327020227909088, | |
| "learning_rate": 9.478269470826744e-05, | |
| "loss": 1.2364, | |
| "step": 2574 | |
| }, | |
| { | |
| "epoch": 0.6970639967528075, | |
| "grad_norm": 0.2091750204563141, | |
| "learning_rate": 9.476868414095237e-05, | |
| "loss": 1.2494, | |
| "step": 2576 | |
| }, | |
| { | |
| "epoch": 0.6976051955080503, | |
| "grad_norm": 0.2145649939775467, | |
| "learning_rate": 9.475465582520466e-05, | |
| "loss": 1.254, | |
| "step": 2578 | |
| }, | |
| { | |
| "epoch": 0.6981463942632932, | |
| "grad_norm": 0.21477670967578888, | |
| "learning_rate": 9.474060976658578e-05, | |
| "loss": 1.2678, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 0.6986875930185361, | |
| "grad_norm": 0.21862445771694183, | |
| "learning_rate": 9.472654597066431e-05, | |
| "loss": 1.2512, | |
| "step": 2582 | |
| }, | |
| { | |
| "epoch": 0.699228791773779, | |
| "grad_norm": 0.21111270785331726, | |
| "learning_rate": 9.471246444301574e-05, | |
| "loss": 1.2587, | |
| "step": 2584 | |
| }, | |
| { | |
| "epoch": 0.6997699905290218, | |
| "grad_norm": 0.21332062780857086, | |
| "learning_rate": 9.469836518922269e-05, | |
| "loss": 1.2569, | |
| "step": 2586 | |
| }, | |
| { | |
| "epoch": 0.7003111892842646, | |
| "grad_norm": 0.21386279165744781, | |
| "learning_rate": 9.468424821487476e-05, | |
| "loss": 1.2308, | |
| "step": 2588 | |
| }, | |
| { | |
| "epoch": 0.7008523880395076, | |
| "grad_norm": 0.20638014376163483, | |
| "learning_rate": 9.46701135255686e-05, | |
| "loss": 1.2453, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 0.7013935867947504, | |
| "grad_norm": 0.2437312752008438, | |
| "learning_rate": 9.465596112690787e-05, | |
| "loss": 1.2523, | |
| "step": 2592 | |
| }, | |
| { | |
| "epoch": 0.7019347855499932, | |
| "grad_norm": 0.22395059466362, | |
| "learning_rate": 9.464179102450325e-05, | |
| "loss": 1.2535, | |
| "step": 2594 | |
| }, | |
| { | |
| "epoch": 0.7024759843052361, | |
| "grad_norm": 0.22118812799453735, | |
| "learning_rate": 9.462760322397246e-05, | |
| "loss": 1.2488, | |
| "step": 2596 | |
| }, | |
| { | |
| "epoch": 0.703017183060479, | |
| "grad_norm": 0.22880488634109497, | |
| "learning_rate": 9.461339773094021e-05, | |
| "loss": 1.2407, | |
| "step": 2598 | |
| }, | |
| { | |
| "epoch": 0.7035583818157218, | |
| "grad_norm": 0.21199798583984375, | |
| "learning_rate": 9.45991745510382e-05, | |
| "loss": 1.2476, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.7040995805709647, | |
| "grad_norm": 0.20646455883979797, | |
| "learning_rate": 9.458493368990519e-05, | |
| "loss": 1.2556, | |
| "step": 2602 | |
| }, | |
| { | |
| "epoch": 0.7046407793262075, | |
| "grad_norm": 0.2136593908071518, | |
| "learning_rate": 9.457067515318698e-05, | |
| "loss": 1.2567, | |
| "step": 2604 | |
| }, | |
| { | |
| "epoch": 0.7051819780814504, | |
| "grad_norm": 0.214664489030838, | |
| "learning_rate": 9.455639894653627e-05, | |
| "loss": 1.266, | |
| "step": 2606 | |
| }, | |
| { | |
| "epoch": 0.7057231768366933, | |
| "grad_norm": 0.2101629078388214, | |
| "learning_rate": 9.454210507561285e-05, | |
| "loss": 1.2499, | |
| "step": 2608 | |
| }, | |
| { | |
| "epoch": 0.7062643755919361, | |
| "grad_norm": 0.2157791256904602, | |
| "learning_rate": 9.452779354608348e-05, | |
| "loss": 1.2421, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 0.706805574347179, | |
| "grad_norm": 0.20827960968017578, | |
| "learning_rate": 9.451346436362196e-05, | |
| "loss": 1.2566, | |
| "step": 2612 | |
| }, | |
| { | |
| "epoch": 0.7073467731024219, | |
| "grad_norm": 0.21283753216266632, | |
| "learning_rate": 9.449911753390901e-05, | |
| "loss": 1.2561, | |
| "step": 2614 | |
| }, | |
| { | |
| "epoch": 0.7078879718576647, | |
| "grad_norm": 0.22358572483062744, | |
| "learning_rate": 9.448475306263245e-05, | |
| "loss": 1.2418, | |
| "step": 2616 | |
| }, | |
| { | |
| "epoch": 0.7084291706129076, | |
| "grad_norm": 0.21198727190494537, | |
| "learning_rate": 9.4470370955487e-05, | |
| "loss": 1.2511, | |
| "step": 2618 | |
| }, | |
| { | |
| "epoch": 0.7089703693681505, | |
| "grad_norm": 0.21495653688907623, | |
| "learning_rate": 9.445597121817442e-05, | |
| "loss": 1.2294, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 0.7095115681233933, | |
| "grad_norm": 0.21378777921199799, | |
| "learning_rate": 9.444155385640345e-05, | |
| "loss": 1.2375, | |
| "step": 2622 | |
| }, | |
| { | |
| "epoch": 0.7100527668786362, | |
| "grad_norm": 0.21197205781936646, | |
| "learning_rate": 9.442711887588981e-05, | |
| "loss": 1.251, | |
| "step": 2624 | |
| }, | |
| { | |
| "epoch": 0.710593965633879, | |
| "grad_norm": 0.21979504823684692, | |
| "learning_rate": 9.441266628235624e-05, | |
| "loss": 1.2467, | |
| "step": 2626 | |
| }, | |
| { | |
| "epoch": 0.7111351643891219, | |
| "grad_norm": 0.21565599739551544, | |
| "learning_rate": 9.43981960815324e-05, | |
| "loss": 1.22, | |
| "step": 2628 | |
| }, | |
| { | |
| "epoch": 0.7116763631443648, | |
| "grad_norm": 0.19891119003295898, | |
| "learning_rate": 9.438370827915499e-05, | |
| "loss": 1.215, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 0.7122175618996076, | |
| "grad_norm": 0.21079830825328827, | |
| "learning_rate": 9.436920288096764e-05, | |
| "loss": 1.2407, | |
| "step": 2632 | |
| }, | |
| { | |
| "epoch": 0.7127587606548504, | |
| "grad_norm": 0.21531549096107483, | |
| "learning_rate": 9.435467989272099e-05, | |
| "loss": 1.2348, | |
| "step": 2634 | |
| }, | |
| { | |
| "epoch": 0.7132999594100934, | |
| "grad_norm": 0.22583681344985962, | |
| "learning_rate": 9.434013932017265e-05, | |
| "loss": 1.2567, | |
| "step": 2636 | |
| }, | |
| { | |
| "epoch": 0.7138411581653362, | |
| "grad_norm": 0.24707137048244476, | |
| "learning_rate": 9.432558116908718e-05, | |
| "loss": 1.244, | |
| "step": 2638 | |
| }, | |
| { | |
| "epoch": 0.714382356920579, | |
| "grad_norm": 0.23890820145606995, | |
| "learning_rate": 9.431100544523614e-05, | |
| "loss": 1.2361, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 0.714923555675822, | |
| "grad_norm": 0.2275097668170929, | |
| "learning_rate": 9.429641215439802e-05, | |
| "loss": 1.2337, | |
| "step": 2642 | |
| }, | |
| { | |
| "epoch": 0.7154647544310648, | |
| "grad_norm": 0.22068314254283905, | |
| "learning_rate": 9.42818013023583e-05, | |
| "loss": 1.246, | |
| "step": 2644 | |
| }, | |
| { | |
| "epoch": 0.7160059531863077, | |
| "grad_norm": 0.22214053571224213, | |
| "learning_rate": 9.426717289490943e-05, | |
| "loss": 1.2507, | |
| "step": 2646 | |
| }, | |
| { | |
| "epoch": 0.7165471519415505, | |
| "grad_norm": 0.21483547985553741, | |
| "learning_rate": 9.425252693785078e-05, | |
| "loss": 1.2223, | |
| "step": 2648 | |
| }, | |
| { | |
| "epoch": 0.7170883506967934, | |
| "grad_norm": 0.21457841992378235, | |
| "learning_rate": 9.423786343698872e-05, | |
| "loss": 1.2494, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.7176295494520363, | |
| "grad_norm": 0.20471327006816864, | |
| "learning_rate": 9.422318239813656e-05, | |
| "loss": 1.2426, | |
| "step": 2652 | |
| }, | |
| { | |
| "epoch": 0.7181707482072791, | |
| "grad_norm": 0.20799721777439117, | |
| "learning_rate": 9.420848382711455e-05, | |
| "loss": 1.2409, | |
| "step": 2654 | |
| }, | |
| { | |
| "epoch": 0.7187119469625219, | |
| "grad_norm": 0.2095753401517868, | |
| "learning_rate": 9.41937677297499e-05, | |
| "loss": 1.2349, | |
| "step": 2656 | |
| }, | |
| { | |
| "epoch": 0.7192531457177649, | |
| "grad_norm": 0.2103864848613739, | |
| "learning_rate": 9.417903411187678e-05, | |
| "loss": 1.2432, | |
| "step": 2658 | |
| }, | |
| { | |
| "epoch": 0.7197943444730077, | |
| "grad_norm": 0.20874999463558197, | |
| "learning_rate": 9.416428297933631e-05, | |
| "loss": 1.24, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 0.7203355432282506, | |
| "grad_norm": 0.21667924523353577, | |
| "learning_rate": 9.41495143379765e-05, | |
| "loss": 1.254, | |
| "step": 2662 | |
| }, | |
| { | |
| "epoch": 0.7208767419834934, | |
| "grad_norm": 0.20849965512752533, | |
| "learning_rate": 9.413472819365237e-05, | |
| "loss": 1.2494, | |
| "step": 2664 | |
| }, | |
| { | |
| "epoch": 0.7214179407387363, | |
| "grad_norm": 0.2131972759962082, | |
| "learning_rate": 9.411992455222585e-05, | |
| "loss": 1.2233, | |
| "step": 2666 | |
| }, | |
| { | |
| "epoch": 0.7219591394939792, | |
| "grad_norm": 0.21590593457221985, | |
| "learning_rate": 9.410510341956579e-05, | |
| "loss": 1.2428, | |
| "step": 2668 | |
| }, | |
| { | |
| "epoch": 0.722500338249222, | |
| "grad_norm": 0.21747298538684845, | |
| "learning_rate": 9.409026480154801e-05, | |
| "loss": 1.2495, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 0.7230415370044649, | |
| "grad_norm": 0.21579551696777344, | |
| "learning_rate": 9.407540870405523e-05, | |
| "loss": 1.2513, | |
| "step": 2672 | |
| }, | |
| { | |
| "epoch": 0.7235827357597078, | |
| "grad_norm": 0.20697540044784546, | |
| "learning_rate": 9.40605351329771e-05, | |
| "loss": 1.2364, | |
| "step": 2674 | |
| }, | |
| { | |
| "epoch": 0.7241239345149506, | |
| "grad_norm": 0.215818852186203, | |
| "learning_rate": 9.404564409421024e-05, | |
| "loss": 1.2242, | |
| "step": 2676 | |
| }, | |
| { | |
| "epoch": 0.7246651332701934, | |
| "grad_norm": 0.21552613377571106, | |
| "learning_rate": 9.403073559365816e-05, | |
| "loss": 1.2378, | |
| "step": 2678 | |
| }, | |
| { | |
| "epoch": 0.7252063320254364, | |
| "grad_norm": 0.20463980734348297, | |
| "learning_rate": 9.401580963723127e-05, | |
| "loss": 1.2144, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 0.7257475307806792, | |
| "grad_norm": 0.20748072862625122, | |
| "learning_rate": 9.400086623084696e-05, | |
| "loss": 1.2422, | |
| "step": 2682 | |
| }, | |
| { | |
| "epoch": 0.726288729535922, | |
| "grad_norm": 0.21622253954410553, | |
| "learning_rate": 9.398590538042948e-05, | |
| "loss": 1.2466, | |
| "step": 2684 | |
| }, | |
| { | |
| "epoch": 0.7268299282911649, | |
| "grad_norm": 0.21229557693004608, | |
| "learning_rate": 9.397092709191005e-05, | |
| "loss": 1.2533, | |
| "step": 2686 | |
| }, | |
| { | |
| "epoch": 0.7273711270464078, | |
| "grad_norm": 0.2206655591726303, | |
| "learning_rate": 9.395593137122676e-05, | |
| "loss": 1.2368, | |
| "step": 2688 | |
| }, | |
| { | |
| "epoch": 0.7279123258016507, | |
| "grad_norm": 0.22106198966503143, | |
| "learning_rate": 9.39409182243246e-05, | |
| "loss": 1.2523, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 0.7284535245568935, | |
| "grad_norm": 0.21155452728271484, | |
| "learning_rate": 9.392588765715554e-05, | |
| "loss": 1.2558, | |
| "step": 2692 | |
| }, | |
| { | |
| "epoch": 0.7289947233121363, | |
| "grad_norm": 0.2205546647310257, | |
| "learning_rate": 9.39108396756784e-05, | |
| "loss": 1.2409, | |
| "step": 2694 | |
| }, | |
| { | |
| "epoch": 0.7295359220673793, | |
| "grad_norm": 0.2159835547208786, | |
| "learning_rate": 9.389577428585888e-05, | |
| "loss": 1.248, | |
| "step": 2696 | |
| }, | |
| { | |
| "epoch": 0.7300771208226221, | |
| "grad_norm": 0.20885945856571198, | |
| "learning_rate": 9.388069149366966e-05, | |
| "loss": 1.2388, | |
| "step": 2698 | |
| }, | |
| { | |
| "epoch": 0.7306183195778649, | |
| "grad_norm": 0.2038174420595169, | |
| "learning_rate": 9.386559130509026e-05, | |
| "loss": 1.2213, | |
| "step": 2700 | |
| } | |
| ], | |
| "logging_steps": 2, | |
| "max_steps": 11088, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 300, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.1223849141975122e+20, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |