| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9999795262371272, | |
| "eval_steps": 500, | |
| "global_step": 24421, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0010236881436439204, | |
| "grad_norm": 1.6028783321380615, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4176, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.0020473762872878407, | |
| "grad_norm": 0.7411264777183533, | |
| "learning_rate": 0.0001, | |
| "loss": 0.5905, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.003071064430931761, | |
| "grad_norm": 1.4544100761413574, | |
| "learning_rate": 9.989741906364122e-05, | |
| "loss": 0.2869, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.004094752574575681, | |
| "grad_norm": 0.31396615505218506, | |
| "learning_rate": 9.979483812728243e-05, | |
| "loss": 0.23, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.005118440718219602, | |
| "grad_norm": 0.7768864035606384, | |
| "learning_rate": 9.969225719092365e-05, | |
| "loss": 0.2192, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.006142128861863522, | |
| "grad_norm": 0.3052266836166382, | |
| "learning_rate": 9.958967625456485e-05, | |
| "loss": 0.2055, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.007165817005507442, | |
| "grad_norm": 0.2620660960674286, | |
| "learning_rate": 9.948709531820606e-05, | |
| "loss": 0.1947, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.008189505149151363, | |
| "grad_norm": 0.31376323103904724, | |
| "learning_rate": 9.938451438184728e-05, | |
| "loss": 0.1904, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.009213193292795283, | |
| "grad_norm": 0.30490225553512573, | |
| "learning_rate": 9.92819334454885e-05, | |
| "loss": 0.1953, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.010236881436439204, | |
| "grad_norm": 0.33286434412002563, | |
| "learning_rate": 9.917935250912971e-05, | |
| "loss": 0.1928, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.011260569580083124, | |
| "grad_norm": 0.36407458782196045, | |
| "learning_rate": 9.907677157277092e-05, | |
| "loss": 0.1771, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.012284257723727043, | |
| "grad_norm": 0.3681598901748657, | |
| "learning_rate": 9.897419063641214e-05, | |
| "loss": 0.1858, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.013307945867370965, | |
| "grad_norm": 0.26045089960098267, | |
| "learning_rate": 9.887160970005335e-05, | |
| "loss": 0.1759, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.014331634011014885, | |
| "grad_norm": 0.6006647348403931, | |
| "learning_rate": 9.876902876369455e-05, | |
| "loss": 0.1698, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.015355322154658804, | |
| "grad_norm": 0.5427098870277405, | |
| "learning_rate": 9.866644782733577e-05, | |
| "loss": 0.1769, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.016379010298302726, | |
| "grad_norm": 0.37887805700302124, | |
| "learning_rate": 9.856386689097698e-05, | |
| "loss": 0.1772, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.017402698441946644, | |
| "grad_norm": 0.31953874230384827, | |
| "learning_rate": 9.84612859546182e-05, | |
| "loss": 0.1776, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.018426386585590565, | |
| "grad_norm": 0.2421693652868271, | |
| "learning_rate": 9.835870501825941e-05, | |
| "loss": 0.1731, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.019450074729234487, | |
| "grad_norm": 0.445103257894516, | |
| "learning_rate": 9.825612408190063e-05, | |
| "loss": 0.1664, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.020473762872878408, | |
| "grad_norm": 0.3354673683643341, | |
| "learning_rate": 9.815354314554184e-05, | |
| "loss": 0.1749, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.021497451016522326, | |
| "grad_norm": 0.24303950369358063, | |
| "learning_rate": 9.805096220918306e-05, | |
| "loss": 0.1655, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.022521139160166247, | |
| "grad_norm": 0.2817317247390747, | |
| "learning_rate": 9.794838127282426e-05, | |
| "loss": 0.1673, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.02354482730381017, | |
| "grad_norm": 0.4167644679546356, | |
| "learning_rate": 9.784580033646547e-05, | |
| "loss": 0.1638, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.024568515447454087, | |
| "grad_norm": 0.339884877204895, | |
| "learning_rate": 9.774321940010669e-05, | |
| "loss": 0.1609, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.025592203591098008, | |
| "grad_norm": 0.2876884937286377, | |
| "learning_rate": 9.76406384637479e-05, | |
| "loss": 0.1659, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.02661589173474193, | |
| "grad_norm": 0.3253774642944336, | |
| "learning_rate": 9.753805752738912e-05, | |
| "loss": 0.1557, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.027639579878385848, | |
| "grad_norm": 0.24355168640613556, | |
| "learning_rate": 9.743547659103033e-05, | |
| "loss": 0.167, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.02866326802202977, | |
| "grad_norm": 0.3610304892063141, | |
| "learning_rate": 9.733289565467155e-05, | |
| "loss": 0.1605, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.02968695616567369, | |
| "grad_norm": 0.3692477345466614, | |
| "learning_rate": 9.723031471831276e-05, | |
| "loss": 0.1546, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.03071064430931761, | |
| "grad_norm": 0.63128262758255, | |
| "learning_rate": 9.712773378195396e-05, | |
| "loss": 0.1652, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.031734332452961526, | |
| "grad_norm": 0.3006066381931305, | |
| "learning_rate": 9.702515284559518e-05, | |
| "loss": 0.1596, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.03275802059660545, | |
| "grad_norm": 0.6283088326454163, | |
| "learning_rate": 9.692257190923639e-05, | |
| "loss": 0.1535, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.03378170874024937, | |
| "grad_norm": 0.4018152356147766, | |
| "learning_rate": 9.68199909728776e-05, | |
| "loss": 0.1603, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.03480539688389329, | |
| "grad_norm": 0.36612433195114136, | |
| "learning_rate": 9.671741003651882e-05, | |
| "loss": 0.1524, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.03582908502753721, | |
| "grad_norm": 0.41699767112731934, | |
| "learning_rate": 9.661482910016003e-05, | |
| "loss": 0.1547, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.03685277317118113, | |
| "grad_norm": 0.25239649415016174, | |
| "learning_rate": 9.651224816380125e-05, | |
| "loss": 0.161, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.037876461314825055, | |
| "grad_norm": 0.28330907225608826, | |
| "learning_rate": 9.640966722744246e-05, | |
| "loss": 0.1544, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.03890014945846897, | |
| "grad_norm": 0.393118292093277, | |
| "learning_rate": 9.630708629108367e-05, | |
| "loss": 0.1529, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.03992383760211289, | |
| "grad_norm": 0.2385636419057846, | |
| "learning_rate": 9.620450535472488e-05, | |
| "loss": 0.1573, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.040947525745756816, | |
| "grad_norm": 0.7217739820480347, | |
| "learning_rate": 9.61019244183661e-05, | |
| "loss": 0.1625, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.041971213889400734, | |
| "grad_norm": 0.4201323091983795, | |
| "learning_rate": 9.599934348200731e-05, | |
| "loss": 0.1554, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 0.04299490203304465, | |
| "grad_norm": 0.2981342375278473, | |
| "learning_rate": 9.589676254564852e-05, | |
| "loss": 0.1606, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.04401859017668858, | |
| "grad_norm": 0.32909801602363586, | |
| "learning_rate": 9.579418160928974e-05, | |
| "loss": 0.1611, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 0.045042278320332495, | |
| "grad_norm": 0.3763565719127655, | |
| "learning_rate": 9.569160067293095e-05, | |
| "loss": 0.1509, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.04606596646397641, | |
| "grad_norm": 0.26296111941337585, | |
| "learning_rate": 9.558901973657215e-05, | |
| "loss": 0.1458, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 0.04708965460762034, | |
| "grad_norm": 0.2251584380865097, | |
| "learning_rate": 9.548643880021337e-05, | |
| "loss": 0.1483, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.048113342751264256, | |
| "grad_norm": 0.24623946845531464, | |
| "learning_rate": 9.538385786385458e-05, | |
| "loss": 0.1521, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 0.049137030894908174, | |
| "grad_norm": 0.45473411679267883, | |
| "learning_rate": 9.52812769274958e-05, | |
| "loss": 0.1549, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.0501607190385521, | |
| "grad_norm": 0.23999722301959991, | |
| "learning_rate": 9.517869599113701e-05, | |
| "loss": 0.1442, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 0.051184407182196016, | |
| "grad_norm": 0.32882001996040344, | |
| "learning_rate": 9.507611505477823e-05, | |
| "loss": 0.153, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.052208095325839934, | |
| "grad_norm": 0.44401663541793823, | |
| "learning_rate": 9.497353411841944e-05, | |
| "loss": 0.1521, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 0.05323178346948386, | |
| "grad_norm": 0.2603824734687805, | |
| "learning_rate": 9.487095318206066e-05, | |
| "loss": 0.1543, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.05425547161312778, | |
| "grad_norm": 0.28304556012153625, | |
| "learning_rate": 9.476837224570186e-05, | |
| "loss": 0.1491, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 0.055279159756771695, | |
| "grad_norm": 0.40350213646888733, | |
| "learning_rate": 9.466579130934307e-05, | |
| "loss": 0.1431, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.05630284790041562, | |
| "grad_norm": 0.3348640501499176, | |
| "learning_rate": 9.456321037298429e-05, | |
| "loss": 0.1439, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 0.05732653604405954, | |
| "grad_norm": 0.3141482472419739, | |
| "learning_rate": 9.44606294366255e-05, | |
| "loss": 0.148, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.058350224187703456, | |
| "grad_norm": 0.2608078420162201, | |
| "learning_rate": 9.435804850026672e-05, | |
| "loss": 0.1461, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 0.05937391233134738, | |
| "grad_norm": 0.2971978485584259, | |
| "learning_rate": 9.425546756390793e-05, | |
| "loss": 0.1435, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.0603976004749913, | |
| "grad_norm": 0.33824801445007324, | |
| "learning_rate": 9.415288662754915e-05, | |
| "loss": 0.1476, | |
| "step": 1475 | |
| }, | |
| { | |
| "epoch": 0.06142128861863522, | |
| "grad_norm": 0.22219249606132507, | |
| "learning_rate": 9.405030569119036e-05, | |
| "loss": 0.1443, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.06244497676227914, | |
| "grad_norm": 0.30279237031936646, | |
| "learning_rate": 9.394772475483156e-05, | |
| "loss": 0.1451, | |
| "step": 1525 | |
| }, | |
| { | |
| "epoch": 0.06346866490592305, | |
| "grad_norm": 0.7361096739768982, | |
| "learning_rate": 9.384514381847278e-05, | |
| "loss": 0.139, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.06449235304956698, | |
| "grad_norm": 0.2694852650165558, | |
| "learning_rate": 9.374256288211399e-05, | |
| "loss": 0.14, | |
| "step": 1575 | |
| }, | |
| { | |
| "epoch": 0.0655160411932109, | |
| "grad_norm": 0.2227030247449875, | |
| "learning_rate": 9.36399819457552e-05, | |
| "loss": 0.1409, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.06653972933685481, | |
| "grad_norm": 0.3561594486236572, | |
| "learning_rate": 9.353740100939642e-05, | |
| "loss": 0.1386, | |
| "step": 1625 | |
| }, | |
| { | |
| "epoch": 0.06756341748049874, | |
| "grad_norm": 0.3476031720638275, | |
| "learning_rate": 9.343482007303764e-05, | |
| "loss": 0.1397, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.06858710562414266, | |
| "grad_norm": 0.3784942626953125, | |
| "learning_rate": 9.333223913667885e-05, | |
| "loss": 0.1437, | |
| "step": 1675 | |
| }, | |
| { | |
| "epoch": 0.06961079376778657, | |
| "grad_norm": 0.38352203369140625, | |
| "learning_rate": 9.322965820032006e-05, | |
| "loss": 0.1345, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.0706344819114305, | |
| "grad_norm": 0.2508692741394043, | |
| "learning_rate": 9.312707726396127e-05, | |
| "loss": 0.1396, | |
| "step": 1725 | |
| }, | |
| { | |
| "epoch": 0.07165817005507442, | |
| "grad_norm": 0.5086421966552734, | |
| "learning_rate": 9.302449632760248e-05, | |
| "loss": 0.1377, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.07268185819871835, | |
| "grad_norm": 0.40866467356681824, | |
| "learning_rate": 9.29219153912437e-05, | |
| "loss": 0.1347, | |
| "step": 1775 | |
| }, | |
| { | |
| "epoch": 0.07370554634236226, | |
| "grad_norm": 0.3897942304611206, | |
| "learning_rate": 9.281933445488491e-05, | |
| "loss": 0.1317, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.07472923448600619, | |
| "grad_norm": 0.3895871937274933, | |
| "learning_rate": 9.271675351852612e-05, | |
| "loss": 0.1415, | |
| "step": 1825 | |
| }, | |
| { | |
| "epoch": 0.07575292262965011, | |
| "grad_norm": 0.3118538558483124, | |
| "learning_rate": 9.261417258216734e-05, | |
| "loss": 0.1276, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.07677661077329402, | |
| "grad_norm": 0.5007463097572327, | |
| "learning_rate": 9.251159164580855e-05, | |
| "loss": 0.1401, | |
| "step": 1875 | |
| }, | |
| { | |
| "epoch": 0.07780029891693795, | |
| "grad_norm": 0.37419870495796204, | |
| "learning_rate": 9.240901070944977e-05, | |
| "loss": 0.1338, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.07882398706058187, | |
| "grad_norm": 0.4792192876338959, | |
| "learning_rate": 9.230642977309097e-05, | |
| "loss": 0.1343, | |
| "step": 1925 | |
| }, | |
| { | |
| "epoch": 0.07984767520422578, | |
| "grad_norm": 0.7688687443733215, | |
| "learning_rate": 9.220384883673218e-05, | |
| "loss": 0.133, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.0808713633478697, | |
| "grad_norm": 0.8818038105964661, | |
| "learning_rate": 9.21012679003734e-05, | |
| "loss": 0.1354, | |
| "step": 1975 | |
| }, | |
| { | |
| "epoch": 0.08189505149151363, | |
| "grad_norm": 0.4251585304737091, | |
| "learning_rate": 9.199868696401461e-05, | |
| "loss": 0.1301, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.08291873963515754, | |
| "grad_norm": 0.33509576320648193, | |
| "learning_rate": 9.189610602765583e-05, | |
| "loss": 0.1336, | |
| "step": 2025 | |
| }, | |
| { | |
| "epoch": 0.08394242777880147, | |
| "grad_norm": 0.1928907036781311, | |
| "learning_rate": 9.179352509129704e-05, | |
| "loss": 0.1355, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.08496611592244539, | |
| "grad_norm": 0.2787665128707886, | |
| "learning_rate": 9.169094415493826e-05, | |
| "loss": 0.1314, | |
| "step": 2075 | |
| }, | |
| { | |
| "epoch": 0.0859898040660893, | |
| "grad_norm": 0.4015423357486725, | |
| "learning_rate": 9.158836321857946e-05, | |
| "loss": 0.1303, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.08701349220973323, | |
| "grad_norm": 0.579844057559967, | |
| "learning_rate": 9.148578228222067e-05, | |
| "loss": 0.1277, | |
| "step": 2125 | |
| }, | |
| { | |
| "epoch": 0.08803718035337715, | |
| "grad_norm": 0.3636709153652191, | |
| "learning_rate": 9.138320134586189e-05, | |
| "loss": 0.128, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.08906086849702106, | |
| "grad_norm": 0.25872743129730225, | |
| "learning_rate": 9.12806204095031e-05, | |
| "loss": 0.1312, | |
| "step": 2175 | |
| }, | |
| { | |
| "epoch": 0.09008455664066499, | |
| "grad_norm": 0.32024118304252625, | |
| "learning_rate": 9.117803947314432e-05, | |
| "loss": 0.1295, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.09110824478430891, | |
| "grad_norm": 0.23083104193210602, | |
| "learning_rate": 9.107545853678553e-05, | |
| "loss": 0.1292, | |
| "step": 2225 | |
| }, | |
| { | |
| "epoch": 0.09213193292795283, | |
| "grad_norm": 0.27154719829559326, | |
| "learning_rate": 9.097287760042675e-05, | |
| "loss": 0.1275, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.09315562107159675, | |
| "grad_norm": 0.29432374238967896, | |
| "learning_rate": 9.087029666406796e-05, | |
| "loss": 0.1246, | |
| "step": 2275 | |
| }, | |
| { | |
| "epoch": 0.09417930921524068, | |
| "grad_norm": 0.5287219882011414, | |
| "learning_rate": 9.076771572770916e-05, | |
| "loss": 0.1293, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.09520299735888459, | |
| "grad_norm": 0.3348105549812317, | |
| "learning_rate": 9.066513479135038e-05, | |
| "loss": 0.1226, | |
| "step": 2325 | |
| }, | |
| { | |
| "epoch": 0.09622668550252851, | |
| "grad_norm": 0.2081725001335144, | |
| "learning_rate": 9.056255385499159e-05, | |
| "loss": 0.1242, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.09725037364617244, | |
| "grad_norm": 0.27878373861312866, | |
| "learning_rate": 9.04599729186328e-05, | |
| "loss": 0.1343, | |
| "step": 2375 | |
| }, | |
| { | |
| "epoch": 0.09827406178981635, | |
| "grad_norm": 0.40117210149765015, | |
| "learning_rate": 9.035739198227402e-05, | |
| "loss": 0.1265, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.09929774993346027, | |
| "grad_norm": 0.46459710597991943, | |
| "learning_rate": 9.025481104591524e-05, | |
| "loss": 0.1218, | |
| "step": 2425 | |
| }, | |
| { | |
| "epoch": 0.1003214380771042, | |
| "grad_norm": 0.19930683076381683, | |
| "learning_rate": 9.015223010955645e-05, | |
| "loss": 0.1258, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.10134512622074811, | |
| "grad_norm": 0.3851957321166992, | |
| "learning_rate": 9.004964917319766e-05, | |
| "loss": 0.1243, | |
| "step": 2475 | |
| }, | |
| { | |
| "epoch": 0.10236881436439203, | |
| "grad_norm": 0.3303160071372986, | |
| "learning_rate": 8.994706823683887e-05, | |
| "loss": 0.1264, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.10339250250803596, | |
| "grad_norm": 0.3450019359588623, | |
| "learning_rate": 8.984448730048008e-05, | |
| "loss": 0.122, | |
| "step": 2525 | |
| }, | |
| { | |
| "epoch": 0.10441619065167987, | |
| "grad_norm": 0.36742231249809265, | |
| "learning_rate": 8.97419063641213e-05, | |
| "loss": 0.1216, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.1054398787953238, | |
| "grad_norm": 0.2524435222148895, | |
| "learning_rate": 8.963932542776251e-05, | |
| "loss": 0.1238, | |
| "step": 2575 | |
| }, | |
| { | |
| "epoch": 0.10646356693896772, | |
| "grad_norm": 0.38917961716651917, | |
| "learning_rate": 8.953674449140372e-05, | |
| "loss": 0.1252, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.10748725508261163, | |
| "grad_norm": 0.3554433584213257, | |
| "learning_rate": 8.943416355504494e-05, | |
| "loss": 0.1213, | |
| "step": 2625 | |
| }, | |
| { | |
| "epoch": 0.10851094322625555, | |
| "grad_norm": 0.2701007127761841, | |
| "learning_rate": 8.933158261868615e-05, | |
| "loss": 0.1255, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.10953463136989948, | |
| "grad_norm": 0.40730130672454834, | |
| "learning_rate": 8.922900168232737e-05, | |
| "loss": 0.1205, | |
| "step": 2675 | |
| }, | |
| { | |
| "epoch": 0.11055831951354339, | |
| "grad_norm": 0.36011001467704773, | |
| "learning_rate": 8.912642074596857e-05, | |
| "loss": 0.1208, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.11158200765718732, | |
| "grad_norm": 0.2509096562862396, | |
| "learning_rate": 8.902383980960978e-05, | |
| "loss": 0.1234, | |
| "step": 2725 | |
| }, | |
| { | |
| "epoch": 0.11260569580083124, | |
| "grad_norm": 0.34861189126968384, | |
| "learning_rate": 8.8921258873251e-05, | |
| "loss": 0.1306, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.11362938394447515, | |
| "grad_norm": 0.20540310442447662, | |
| "learning_rate": 8.881867793689221e-05, | |
| "loss": 0.1174, | |
| "step": 2775 | |
| }, | |
| { | |
| "epoch": 0.11465307208811908, | |
| "grad_norm": 0.26270365715026855, | |
| "learning_rate": 8.871609700053343e-05, | |
| "loss": 0.1299, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.115676760231763, | |
| "grad_norm": 0.5314069986343384, | |
| "learning_rate": 8.861351606417464e-05, | |
| "loss": 0.1193, | |
| "step": 2825 | |
| }, | |
| { | |
| "epoch": 0.11670044837540691, | |
| "grad_norm": 0.26417431235313416, | |
| "learning_rate": 8.851093512781586e-05, | |
| "loss": 0.1221, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.11772413651905084, | |
| "grad_norm": 0.2860862612724304, | |
| "learning_rate": 8.840835419145706e-05, | |
| "loss": 0.1273, | |
| "step": 2875 | |
| }, | |
| { | |
| "epoch": 0.11874782466269476, | |
| "grad_norm": 0.27751094102859497, | |
| "learning_rate": 8.830577325509827e-05, | |
| "loss": 0.1206, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.11977151280633867, | |
| "grad_norm": 0.45580488443374634, | |
| "learning_rate": 8.820319231873949e-05, | |
| "loss": 0.1187, | |
| "step": 2925 | |
| }, | |
| { | |
| "epoch": 0.1207952009499826, | |
| "grad_norm": 0.2574482560157776, | |
| "learning_rate": 8.81006113823807e-05, | |
| "loss": 0.1229, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.12181888909362652, | |
| "grad_norm": 0.2733965516090393, | |
| "learning_rate": 8.799803044602192e-05, | |
| "loss": 0.1191, | |
| "step": 2975 | |
| }, | |
| { | |
| "epoch": 0.12284257723727043, | |
| "grad_norm": 0.2117166668176651, | |
| "learning_rate": 8.789544950966313e-05, | |
| "loss": 0.1205, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.12386626538091436, | |
| "grad_norm": 0.5137503147125244, | |
| "learning_rate": 8.779286857330435e-05, | |
| "loss": 0.1264, | |
| "step": 3025 | |
| }, | |
| { | |
| "epoch": 0.12488995352455828, | |
| "grad_norm": 0.23070771992206573, | |
| "learning_rate": 8.769028763694556e-05, | |
| "loss": 0.118, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.1259136416682022, | |
| "grad_norm": 0.2723982334136963, | |
| "learning_rate": 8.758770670058676e-05, | |
| "loss": 0.1152, | |
| "step": 3075 | |
| }, | |
| { | |
| "epoch": 0.1269373298118461, | |
| "grad_norm": 0.3011278212070465, | |
| "learning_rate": 8.748512576422798e-05, | |
| "loss": 0.1187, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.12796101795549003, | |
| "grad_norm": 0.22801214456558228, | |
| "learning_rate": 8.738254482786919e-05, | |
| "loss": 0.1182, | |
| "step": 3125 | |
| }, | |
| { | |
| "epoch": 0.12898470609913396, | |
| "grad_norm": 0.3295694589614868, | |
| "learning_rate": 8.72799638915104e-05, | |
| "loss": 0.1213, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.13000839424277788, | |
| "grad_norm": 0.34608685970306396, | |
| "learning_rate": 8.717738295515162e-05, | |
| "loss": 0.1199, | |
| "step": 3175 | |
| }, | |
| { | |
| "epoch": 0.1310320823864218, | |
| "grad_norm": 0.5989237427711487, | |
| "learning_rate": 8.707480201879284e-05, | |
| "loss": 0.1173, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.13205577053006573, | |
| "grad_norm": 0.3048112094402313, | |
| "learning_rate": 8.697222108243405e-05, | |
| "loss": 0.1178, | |
| "step": 3225 | |
| }, | |
| { | |
| "epoch": 0.13307945867370963, | |
| "grad_norm": 0.3791589140892029, | |
| "learning_rate": 8.686964014607527e-05, | |
| "loss": 0.1175, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.13410314681735355, | |
| "grad_norm": 0.1966562420129776, | |
| "learning_rate": 8.676705920971647e-05, | |
| "loss": 0.1192, | |
| "step": 3275 | |
| }, | |
| { | |
| "epoch": 0.13512683496099748, | |
| "grad_norm": 0.36613497138023376, | |
| "learning_rate": 8.666447827335768e-05, | |
| "loss": 0.1167, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.1361505231046414, | |
| "grad_norm": 0.35663649439811707, | |
| "learning_rate": 8.65618973369989e-05, | |
| "loss": 0.122, | |
| "step": 3325 | |
| }, | |
| { | |
| "epoch": 0.13717421124828533, | |
| "grad_norm": 0.2863902151584625, | |
| "learning_rate": 8.645931640064011e-05, | |
| "loss": 0.1157, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 0.13819789939192925, | |
| "grad_norm": 0.3368700444698334, | |
| "learning_rate": 8.635673546428133e-05, | |
| "loss": 0.1174, | |
| "step": 3375 | |
| }, | |
| { | |
| "epoch": 0.13922158753557315, | |
| "grad_norm": 0.3548611104488373, | |
| "learning_rate": 8.625415452792254e-05, | |
| "loss": 0.118, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.14024527567921707, | |
| "grad_norm": 0.25708600878715515, | |
| "learning_rate": 8.615157359156375e-05, | |
| "loss": 0.1119, | |
| "step": 3425 | |
| }, | |
| { | |
| "epoch": 0.141268963822861, | |
| "grad_norm": 0.24036449193954468, | |
| "learning_rate": 8.604899265520497e-05, | |
| "loss": 0.115, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.14229265196650492, | |
| "grad_norm": 0.45417720079421997, | |
| "learning_rate": 8.594641171884617e-05, | |
| "loss": 0.1199, | |
| "step": 3475 | |
| }, | |
| { | |
| "epoch": 0.14331634011014885, | |
| "grad_norm": 0.28222933411598206, | |
| "learning_rate": 8.584383078248738e-05, | |
| "loss": 0.113, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.14434002825379277, | |
| "grad_norm": 0.2157520204782486, | |
| "learning_rate": 8.57412498461286e-05, | |
| "loss": 0.1146, | |
| "step": 3525 | |
| }, | |
| { | |
| "epoch": 0.1453637163974367, | |
| "grad_norm": 0.3632587790489197, | |
| "learning_rate": 8.563866890976981e-05, | |
| "loss": 0.1174, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 0.1463874045410806, | |
| "grad_norm": 0.23103779554367065, | |
| "learning_rate": 8.553608797341103e-05, | |
| "loss": 0.1111, | |
| "step": 3575 | |
| }, | |
| { | |
| "epoch": 0.14741109268472452, | |
| "grad_norm": 0.316450297832489, | |
| "learning_rate": 8.543350703705224e-05, | |
| "loss": 0.1148, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.14843478082836845, | |
| "grad_norm": 0.2546501159667969, | |
| "learning_rate": 8.533092610069346e-05, | |
| "loss": 0.1116, | |
| "step": 3625 | |
| }, | |
| { | |
| "epoch": 0.14945846897201237, | |
| "grad_norm": 0.5451907515525818, | |
| "learning_rate": 8.522834516433467e-05, | |
| "loss": 0.1154, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 0.1504821571156563, | |
| "grad_norm": 0.3568204939365387, | |
| "learning_rate": 8.512576422797587e-05, | |
| "loss": 0.1152, | |
| "step": 3675 | |
| }, | |
| { | |
| "epoch": 0.15150584525930022, | |
| "grad_norm": 0.22811046242713928, | |
| "learning_rate": 8.502318329161709e-05, | |
| "loss": 0.1164, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.15252953340294412, | |
| "grad_norm": 0.2431710660457611, | |
| "learning_rate": 8.49206023552583e-05, | |
| "loss": 0.1127, | |
| "step": 3725 | |
| }, | |
| { | |
| "epoch": 0.15355322154658804, | |
| "grad_norm": 0.27546626329421997, | |
| "learning_rate": 8.481802141889952e-05, | |
| "loss": 0.1174, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.15457690969023197, | |
| "grad_norm": 0.23295095562934875, | |
| "learning_rate": 8.471544048254073e-05, | |
| "loss": 0.1124, | |
| "step": 3775 | |
| }, | |
| { | |
| "epoch": 0.1556005978338759, | |
| "grad_norm": 0.2244202196598053, | |
| "learning_rate": 8.461285954618195e-05, | |
| "loss": 0.1104, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.15662428597751982, | |
| "grad_norm": 0.19517052173614502, | |
| "learning_rate": 8.451027860982316e-05, | |
| "loss": 0.1114, | |
| "step": 3825 | |
| }, | |
| { | |
| "epoch": 0.15764797412116374, | |
| "grad_norm": 0.26743006706237793, | |
| "learning_rate": 8.440769767346436e-05, | |
| "loss": 0.1112, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 0.15867166226480764, | |
| "grad_norm": 0.26785147190093994, | |
| "learning_rate": 8.430511673710558e-05, | |
| "loss": 0.1126, | |
| "step": 3875 | |
| }, | |
| { | |
| "epoch": 0.15969535040845156, | |
| "grad_norm": 0.2772103250026703, | |
| "learning_rate": 8.420253580074679e-05, | |
| "loss": 0.1137, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.1607190385520955, | |
| "grad_norm": 0.268732488155365, | |
| "learning_rate": 8.409995486438801e-05, | |
| "loss": 0.1148, | |
| "step": 3925 | |
| }, | |
| { | |
| "epoch": 0.1617427266957394, | |
| "grad_norm": 0.42661407589912415, | |
| "learning_rate": 8.399737392802922e-05, | |
| "loss": 0.1147, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 0.16276641483938334, | |
| "grad_norm": 0.2644007205963135, | |
| "learning_rate": 8.389479299167044e-05, | |
| "loss": 0.1179, | |
| "step": 3975 | |
| }, | |
| { | |
| "epoch": 0.16379010298302726, | |
| "grad_norm": 0.4172644019126892, | |
| "learning_rate": 8.379221205531165e-05, | |
| "loss": 0.1167, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.16481379112667116, | |
| "grad_norm": 0.2200649082660675, | |
| "learning_rate": 8.368963111895287e-05, | |
| "loss": 0.1154, | |
| "step": 4025 | |
| }, | |
| { | |
| "epoch": 0.16583747927031509, | |
| "grad_norm": 0.3296594023704529, | |
| "learning_rate": 8.358705018259407e-05, | |
| "loss": 0.1136, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 0.166861167413959, | |
| "grad_norm": 0.2407379001379013, | |
| "learning_rate": 8.348446924623528e-05, | |
| "loss": 0.1127, | |
| "step": 4075 | |
| }, | |
| { | |
| "epoch": 0.16788485555760294, | |
| "grad_norm": 0.19917023181915283, | |
| "learning_rate": 8.33818883098765e-05, | |
| "loss": 0.1167, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.16890854370124686, | |
| "grad_norm": 0.2644532024860382, | |
| "learning_rate": 8.327930737351771e-05, | |
| "loss": 0.1156, | |
| "step": 4125 | |
| }, | |
| { | |
| "epoch": 0.16993223184489079, | |
| "grad_norm": 0.22355978190898895, | |
| "learning_rate": 8.317672643715893e-05, | |
| "loss": 0.113, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 0.17095591998853468, | |
| "grad_norm": 0.3826581835746765, | |
| "learning_rate": 8.307414550080014e-05, | |
| "loss": 0.1172, | |
| "step": 4175 | |
| }, | |
| { | |
| "epoch": 0.1719796081321786, | |
| "grad_norm": 0.2284521907567978, | |
| "learning_rate": 8.297156456444136e-05, | |
| "loss": 0.1135, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.17300329627582253, | |
| "grad_norm": 0.2520081400871277, | |
| "learning_rate": 8.286898362808257e-05, | |
| "loss": 0.1146, | |
| "step": 4225 | |
| }, | |
| { | |
| "epoch": 0.17402698441946646, | |
| "grad_norm": 0.385019451379776, | |
| "learning_rate": 8.276640269172377e-05, | |
| "loss": 0.1102, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 0.17505067256311038, | |
| "grad_norm": 0.24445098638534546, | |
| "learning_rate": 8.266382175536499e-05, | |
| "loss": 0.1124, | |
| "step": 4275 | |
| }, | |
| { | |
| "epoch": 0.1760743607067543, | |
| "grad_norm": 0.24673700332641602, | |
| "learning_rate": 8.25612408190062e-05, | |
| "loss": 0.112, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.1770980488503982, | |
| "grad_norm": 0.2432449609041214, | |
| "learning_rate": 8.245865988264741e-05, | |
| "loss": 0.1121, | |
| "step": 4325 | |
| }, | |
| { | |
| "epoch": 0.17812173699404213, | |
| "grad_norm": 0.3263969123363495, | |
| "learning_rate": 8.235607894628863e-05, | |
| "loss": 0.1131, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 0.17914542513768605, | |
| "grad_norm": 0.25198620557785034, | |
| "learning_rate": 8.225349800992984e-05, | |
| "loss": 0.1106, | |
| "step": 4375 | |
| }, | |
| { | |
| "epoch": 0.18016911328132998, | |
| "grad_norm": 0.31025946140289307, | |
| "learning_rate": 8.215091707357106e-05, | |
| "loss": 0.1083, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.1811928014249739, | |
| "grad_norm": 0.2822698950767517, | |
| "learning_rate": 8.204833613721227e-05, | |
| "loss": 0.1128, | |
| "step": 4425 | |
| }, | |
| { | |
| "epoch": 0.18221648956861783, | |
| "grad_norm": 0.35102951526641846, | |
| "learning_rate": 8.194575520085347e-05, | |
| "loss": 0.1119, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 0.18324017771226173, | |
| "grad_norm": 0.2636832892894745, | |
| "learning_rate": 8.184317426449469e-05, | |
| "loss": 0.1154, | |
| "step": 4475 | |
| }, | |
| { | |
| "epoch": 0.18426386585590565, | |
| "grad_norm": 0.2501748204231262, | |
| "learning_rate": 8.17405933281359e-05, | |
| "loss": 0.1146, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.18528755399954958, | |
| "grad_norm": 0.24221724271774292, | |
| "learning_rate": 8.163801239177712e-05, | |
| "loss": 0.1111, | |
| "step": 4525 | |
| }, | |
| { | |
| "epoch": 0.1863112421431935, | |
| "grad_norm": 0.23959171772003174, | |
| "learning_rate": 8.153543145541833e-05, | |
| "loss": 0.1121, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 0.18733493028683743, | |
| "grad_norm": 0.28256523609161377, | |
| "learning_rate": 8.143285051905955e-05, | |
| "loss": 0.1056, | |
| "step": 4575 | |
| }, | |
| { | |
| "epoch": 0.18835861843048135, | |
| "grad_norm": 0.1967180222272873, | |
| "learning_rate": 8.133026958270076e-05, | |
| "loss": 0.1105, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.18938230657412525, | |
| "grad_norm": 0.25965237617492676, | |
| "learning_rate": 8.122768864634198e-05, | |
| "loss": 0.1082, | |
| "step": 4625 | |
| }, | |
| { | |
| "epoch": 0.19040599471776917, | |
| "grad_norm": 0.2722185552120209, | |
| "learning_rate": 8.112510770998318e-05, | |
| "loss": 0.1134, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 0.1914296828614131, | |
| "grad_norm": 0.24172380566596985, | |
| "learning_rate": 8.102252677362439e-05, | |
| "loss": 0.1138, | |
| "step": 4675 | |
| }, | |
| { | |
| "epoch": 0.19245337100505702, | |
| "grad_norm": 0.26783162355422974, | |
| "learning_rate": 8.091994583726561e-05, | |
| "loss": 0.1079, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.19347705914870095, | |
| "grad_norm": 0.2563905715942383, | |
| "learning_rate": 8.081736490090682e-05, | |
| "loss": 0.1097, | |
| "step": 4725 | |
| }, | |
| { | |
| "epoch": 0.19450074729234487, | |
| "grad_norm": 0.31813859939575195, | |
| "learning_rate": 8.071478396454804e-05, | |
| "loss": 0.1107, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 0.19552443543598877, | |
| "grad_norm": 0.2353924810886383, | |
| "learning_rate": 8.061220302818925e-05, | |
| "loss": 0.1112, | |
| "step": 4775 | |
| }, | |
| { | |
| "epoch": 0.1965481235796327, | |
| "grad_norm": 0.24150237441062927, | |
| "learning_rate": 8.050962209183047e-05, | |
| "loss": 0.1073, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.19757181172327662, | |
| "grad_norm": 0.31365466117858887, | |
| "learning_rate": 8.040704115547167e-05, | |
| "loss": 0.1091, | |
| "step": 4825 | |
| }, | |
| { | |
| "epoch": 0.19859549986692054, | |
| "grad_norm": 0.3214346468448639, | |
| "learning_rate": 8.030446021911288e-05, | |
| "loss": 0.1122, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 0.19961918801056447, | |
| "grad_norm": 0.2675853967666626, | |
| "learning_rate": 8.02018792827541e-05, | |
| "loss": 0.1078, | |
| "step": 4875 | |
| }, | |
| { | |
| "epoch": 0.2006428761542084, | |
| "grad_norm": 0.2487669289112091, | |
| "learning_rate": 8.009929834639531e-05, | |
| "loss": 0.1087, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.2016665642978523, | |
| "grad_norm": 0.23890641331672668, | |
| "learning_rate": 7.999671741003653e-05, | |
| "loss": 0.1143, | |
| "step": 4925 | |
| }, | |
| { | |
| "epoch": 0.20269025244149622, | |
| "grad_norm": 0.25644829869270325, | |
| "learning_rate": 7.989413647367774e-05, | |
| "loss": 0.1117, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 0.20371394058514014, | |
| "grad_norm": 0.24456225335597992, | |
| "learning_rate": 7.979155553731896e-05, | |
| "loss": 0.115, | |
| "step": 4975 | |
| }, | |
| { | |
| "epoch": 0.20473762872878407, | |
| "grad_norm": 0.17908118665218353, | |
| "learning_rate": 7.968897460096017e-05, | |
| "loss": 0.1124, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.205761316872428, | |
| "grad_norm": 0.35271450877189636, | |
| "learning_rate": 7.958639366460137e-05, | |
| "loss": 0.1101, | |
| "step": 5025 | |
| }, | |
| { | |
| "epoch": 0.20678500501607192, | |
| "grad_norm": 0.2770853340625763, | |
| "learning_rate": 7.948381272824259e-05, | |
| "loss": 0.1119, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 0.2078086931597158, | |
| "grad_norm": 0.3154667317867279, | |
| "learning_rate": 7.93812317918838e-05, | |
| "loss": 0.1089, | |
| "step": 5075 | |
| }, | |
| { | |
| "epoch": 0.20883238130335974, | |
| "grad_norm": 0.27350950241088867, | |
| "learning_rate": 7.927865085552502e-05, | |
| "loss": 0.1113, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.20985606944700366, | |
| "grad_norm": 0.24580037593841553, | |
| "learning_rate": 7.917606991916623e-05, | |
| "loss": 0.1112, | |
| "step": 5125 | |
| }, | |
| { | |
| "epoch": 0.2108797575906476, | |
| "grad_norm": 0.23447053134441376, | |
| "learning_rate": 7.907348898280744e-05, | |
| "loss": 0.1108, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 0.2119034457342915, | |
| "grad_norm": 0.2380298674106598, | |
| "learning_rate": 7.897090804644866e-05, | |
| "loss": 0.1082, | |
| "step": 5175 | |
| }, | |
| { | |
| "epoch": 0.21292713387793544, | |
| "grad_norm": 0.22617502510547638, | |
| "learning_rate": 7.886832711008987e-05, | |
| "loss": 0.108, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.21395082202157933, | |
| "grad_norm": 0.2923017740249634, | |
| "learning_rate": 7.876574617373108e-05, | |
| "loss": 0.1094, | |
| "step": 5225 | |
| }, | |
| { | |
| "epoch": 0.21497451016522326, | |
| "grad_norm": 0.280912846326828, | |
| "learning_rate": 7.866316523737229e-05, | |
| "loss": 0.1108, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 0.21599819830886718, | |
| "grad_norm": 0.24020980298519135, | |
| "learning_rate": 7.85605843010135e-05, | |
| "loss": 0.1104, | |
| "step": 5275 | |
| }, | |
| { | |
| "epoch": 0.2170218864525111, | |
| "grad_norm": 0.2545349597930908, | |
| "learning_rate": 7.845800336465472e-05, | |
| "loss": 0.1105, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.21804557459615503, | |
| "grad_norm": 0.22493721544742584, | |
| "learning_rate": 7.835542242829593e-05, | |
| "loss": 0.1086, | |
| "step": 5325 | |
| }, | |
| { | |
| "epoch": 0.21906926273979896, | |
| "grad_norm": 0.26803284883499146, | |
| "learning_rate": 7.825284149193715e-05, | |
| "loss": 0.107, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 0.22009295088344286, | |
| "grad_norm": 0.22854533791542053, | |
| "learning_rate": 7.815026055557836e-05, | |
| "loss": 0.1097, | |
| "step": 5375 | |
| }, | |
| { | |
| "epoch": 0.22111663902708678, | |
| "grad_norm": 0.19401207566261292, | |
| "learning_rate": 7.804767961921958e-05, | |
| "loss": 0.1082, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.2221403271707307, | |
| "grad_norm": 0.22267797589302063, | |
| "learning_rate": 7.794509868286078e-05, | |
| "loss": 0.1107, | |
| "step": 5425 | |
| }, | |
| { | |
| "epoch": 0.22316401531437463, | |
| "grad_norm": 0.19586950540542603, | |
| "learning_rate": 7.7842517746502e-05, | |
| "loss": 0.1054, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 0.22418770345801856, | |
| "grad_norm": 0.23129217326641083, | |
| "learning_rate": 7.773993681014321e-05, | |
| "loss": 0.1093, | |
| "step": 5475 | |
| }, | |
| { | |
| "epoch": 0.22521139160166248, | |
| "grad_norm": 0.26472529768943787, | |
| "learning_rate": 7.763735587378442e-05, | |
| "loss": 0.1052, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.22623507974530638, | |
| "grad_norm": 0.22230687737464905, | |
| "learning_rate": 7.753477493742564e-05, | |
| "loss": 0.1093, | |
| "step": 5525 | |
| }, | |
| { | |
| "epoch": 0.2272587678889503, | |
| "grad_norm": 0.3101346492767334, | |
| "learning_rate": 7.743219400106685e-05, | |
| "loss": 0.1036, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 0.22828245603259423, | |
| "grad_norm": 0.18460065126419067, | |
| "learning_rate": 7.732961306470807e-05, | |
| "loss": 0.1108, | |
| "step": 5575 | |
| }, | |
| { | |
| "epoch": 0.22930614417623815, | |
| "grad_norm": 0.20973823964595795, | |
| "learning_rate": 7.722703212834928e-05, | |
| "loss": 0.1096, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.23032983231988208, | |
| "grad_norm": 0.277650386095047, | |
| "learning_rate": 7.712445119199048e-05, | |
| "loss": 0.1065, | |
| "step": 5625 | |
| }, | |
| { | |
| "epoch": 0.231353520463526, | |
| "grad_norm": 0.22262975573539734, | |
| "learning_rate": 7.70218702556317e-05, | |
| "loss": 0.1103, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 0.2323772086071699, | |
| "grad_norm": 0.24553848803043365, | |
| "learning_rate": 7.691928931927291e-05, | |
| "loss": 0.1111, | |
| "step": 5675 | |
| }, | |
| { | |
| "epoch": 0.23340089675081382, | |
| "grad_norm": 0.30652496218681335, | |
| "learning_rate": 7.681670838291413e-05, | |
| "loss": 0.1066, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.23442458489445775, | |
| "grad_norm": 0.17171849310398102, | |
| "learning_rate": 7.671412744655534e-05, | |
| "loss": 0.1074, | |
| "step": 5725 | |
| }, | |
| { | |
| "epoch": 0.23544827303810167, | |
| "grad_norm": 0.27997660636901855, | |
| "learning_rate": 7.661154651019656e-05, | |
| "loss": 0.1057, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 0.2364719611817456, | |
| "grad_norm": 0.302190899848938, | |
| "learning_rate": 7.650896557383777e-05, | |
| "loss": 0.1078, | |
| "step": 5775 | |
| }, | |
| { | |
| "epoch": 0.23749564932538952, | |
| "grad_norm": 0.29618439078330994, | |
| "learning_rate": 7.640638463747897e-05, | |
| "loss": 0.1078, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.23851933746903342, | |
| "grad_norm": 0.25362005829811096, | |
| "learning_rate": 7.630380370112019e-05, | |
| "loss": 0.1052, | |
| "step": 5825 | |
| }, | |
| { | |
| "epoch": 0.23954302561267735, | |
| "grad_norm": 0.22422952950000763, | |
| "learning_rate": 7.62012227647614e-05, | |
| "loss": 0.1069, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 0.24056671375632127, | |
| "grad_norm": 0.21477550268173218, | |
| "learning_rate": 7.609864182840262e-05, | |
| "loss": 0.1079, | |
| "step": 5875 | |
| }, | |
| { | |
| "epoch": 0.2415904018999652, | |
| "grad_norm": 0.17787286639213562, | |
| "learning_rate": 7.599606089204383e-05, | |
| "loss": 0.1087, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 0.24261409004360912, | |
| "grad_norm": 0.25852805376052856, | |
| "learning_rate": 7.589347995568505e-05, | |
| "loss": 0.1055, | |
| "step": 5925 | |
| }, | |
| { | |
| "epoch": 0.24363777818725305, | |
| "grad_norm": 0.2465522438287735, | |
| "learning_rate": 7.579089901932626e-05, | |
| "loss": 0.1052, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 0.24466146633089694, | |
| "grad_norm": 0.20638887584209442, | |
| "learning_rate": 7.568831808296747e-05, | |
| "loss": 0.1059, | |
| "step": 5975 | |
| }, | |
| { | |
| "epoch": 0.24568515447454087, | |
| "grad_norm": 0.24599237740039825, | |
| "learning_rate": 7.558573714660868e-05, | |
| "loss": 0.1052, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.2467088426181848, | |
| "grad_norm": 0.2663975954055786, | |
| "learning_rate": 7.548315621024989e-05, | |
| "loss": 0.1051, | |
| "step": 6025 | |
| }, | |
| { | |
| "epoch": 0.24773253076182872, | |
| "grad_norm": 0.2528514266014099, | |
| "learning_rate": 7.53805752738911e-05, | |
| "loss": 0.108, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 0.24875621890547264, | |
| "grad_norm": 0.23383919894695282, | |
| "learning_rate": 7.527799433753232e-05, | |
| "loss": 0.108, | |
| "step": 6075 | |
| }, | |
| { | |
| "epoch": 0.24977990704911657, | |
| "grad_norm": 0.23460572957992554, | |
| "learning_rate": 7.517541340117353e-05, | |
| "loss": 0.1052, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 0.25080359519276046, | |
| "grad_norm": 0.23296788334846497, | |
| "learning_rate": 7.507283246481475e-05, | |
| "loss": 0.1045, | |
| "step": 6125 | |
| }, | |
| { | |
| "epoch": 0.2518272833364044, | |
| "grad_norm": 0.2544507682323456, | |
| "learning_rate": 7.497025152845596e-05, | |
| "loss": 0.1073, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 0.2528509714800483, | |
| "grad_norm": 0.33089134097099304, | |
| "learning_rate": 7.486767059209718e-05, | |
| "loss": 0.1047, | |
| "step": 6175 | |
| }, | |
| { | |
| "epoch": 0.2538746596236922, | |
| "grad_norm": 0.2965986132621765, | |
| "learning_rate": 7.476508965573838e-05, | |
| "loss": 0.1094, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.25489834776733616, | |
| "grad_norm": 0.2606011927127838, | |
| "learning_rate": 7.46625087193796e-05, | |
| "loss": 0.1035, | |
| "step": 6225 | |
| }, | |
| { | |
| "epoch": 0.25592203591098006, | |
| "grad_norm": 0.21870043873786926, | |
| "learning_rate": 7.455992778302081e-05, | |
| "loss": 0.1081, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 0.256945724054624, | |
| "grad_norm": 0.37876567244529724, | |
| "learning_rate": 7.445734684666202e-05, | |
| "loss": 0.1049, | |
| "step": 6275 | |
| }, | |
| { | |
| "epoch": 0.2579694121982679, | |
| "grad_norm": 0.26862943172454834, | |
| "learning_rate": 7.435476591030324e-05, | |
| "loss": 0.0993, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 0.25899310034191186, | |
| "grad_norm": 0.23476149141788483, | |
| "learning_rate": 7.425218497394445e-05, | |
| "loss": 0.1059, | |
| "step": 6325 | |
| }, | |
| { | |
| "epoch": 0.26001678848555576, | |
| "grad_norm": 0.21397703886032104, | |
| "learning_rate": 7.414960403758567e-05, | |
| "loss": 0.1068, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 0.26104047662919966, | |
| "grad_norm": 0.18096783757209778, | |
| "learning_rate": 7.404702310122688e-05, | |
| "loss": 0.1072, | |
| "step": 6375 | |
| }, | |
| { | |
| "epoch": 0.2620641647728436, | |
| "grad_norm": 0.2302347868680954, | |
| "learning_rate": 7.394444216486808e-05, | |
| "loss": 0.1106, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.2630878529164875, | |
| "grad_norm": 0.23029176890850067, | |
| "learning_rate": 7.38418612285093e-05, | |
| "loss": 0.1064, | |
| "step": 6425 | |
| }, | |
| { | |
| "epoch": 0.26411154106013146, | |
| "grad_norm": 0.22477678954601288, | |
| "learning_rate": 7.373928029215051e-05, | |
| "loss": 0.1066, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 0.26513522920377536, | |
| "grad_norm": 0.30752694606781006, | |
| "learning_rate": 7.363669935579173e-05, | |
| "loss": 0.1072, | |
| "step": 6475 | |
| }, | |
| { | |
| "epoch": 0.26615891734741925, | |
| "grad_norm": 0.21718832850456238, | |
| "learning_rate": 7.353411841943294e-05, | |
| "loss": 0.1077, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.2671826054910632, | |
| "grad_norm": 0.24620802700519562, | |
| "learning_rate": 7.343153748307416e-05, | |
| "loss": 0.1053, | |
| "step": 6525 | |
| }, | |
| { | |
| "epoch": 0.2682062936347071, | |
| "grad_norm": 0.1965140402317047, | |
| "learning_rate": 7.332895654671537e-05, | |
| "loss": 0.1057, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 0.26922998177835106, | |
| "grad_norm": 0.25057727098464966, | |
| "learning_rate": 7.322637561035657e-05, | |
| "loss": 0.1037, | |
| "step": 6575 | |
| }, | |
| { | |
| "epoch": 0.27025366992199495, | |
| "grad_norm": 0.2844404876232147, | |
| "learning_rate": 7.312379467399779e-05, | |
| "loss": 0.1026, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 0.2712773580656389, | |
| "grad_norm": 0.23390497267246246, | |
| "learning_rate": 7.3021213737639e-05, | |
| "loss": 0.1032, | |
| "step": 6625 | |
| }, | |
| { | |
| "epoch": 0.2723010462092828, | |
| "grad_norm": 0.19829843938350677, | |
| "learning_rate": 7.291863280128022e-05, | |
| "loss": 0.1091, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 0.2733247343529267, | |
| "grad_norm": 0.24273422360420227, | |
| "learning_rate": 7.281605186492143e-05, | |
| "loss": 0.1075, | |
| "step": 6675 | |
| }, | |
| { | |
| "epoch": 0.27434842249657065, | |
| "grad_norm": 0.3134569823741913, | |
| "learning_rate": 7.271347092856265e-05, | |
| "loss": 0.103, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 0.27537211064021455, | |
| "grad_norm": 0.18153002858161926, | |
| "learning_rate": 7.261088999220386e-05, | |
| "loss": 0.1055, | |
| "step": 6725 | |
| }, | |
| { | |
| "epoch": 0.2763957987838585, | |
| "grad_norm": 0.22859077155590057, | |
| "learning_rate": 7.250830905584507e-05, | |
| "loss": 0.1082, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 0.2774194869275024, | |
| "grad_norm": 0.2673007845878601, | |
| "learning_rate": 7.240572811948628e-05, | |
| "loss": 0.1045, | |
| "step": 6775 | |
| }, | |
| { | |
| "epoch": 0.2784431750711463, | |
| "grad_norm": 0.2651185691356659, | |
| "learning_rate": 7.230314718312749e-05, | |
| "loss": 0.1033, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 0.27946686321479025, | |
| "grad_norm": 0.2199607491493225, | |
| "learning_rate": 7.22005662467687e-05, | |
| "loss": 0.1056, | |
| "step": 6825 | |
| }, | |
| { | |
| "epoch": 0.28049055135843415, | |
| "grad_norm": 0.2549345791339874, | |
| "learning_rate": 7.209798531040992e-05, | |
| "loss": 0.1053, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 0.2815142395020781, | |
| "grad_norm": 0.22934679687023163, | |
| "learning_rate": 7.199540437405113e-05, | |
| "loss": 0.1065, | |
| "step": 6875 | |
| }, | |
| { | |
| "epoch": 0.282537927645722, | |
| "grad_norm": 0.2626487910747528, | |
| "learning_rate": 7.189282343769235e-05, | |
| "loss": 0.1034, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 0.28356161578936595, | |
| "grad_norm": 0.2974385917186737, | |
| "learning_rate": 7.179024250133356e-05, | |
| "loss": 0.1046, | |
| "step": 6925 | |
| }, | |
| { | |
| "epoch": 0.28458530393300985, | |
| "grad_norm": 0.2448814958333969, | |
| "learning_rate": 7.168766156497478e-05, | |
| "loss": 0.1067, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 0.28560899207665374, | |
| "grad_norm": 0.39903128147125244, | |
| "learning_rate": 7.158508062861598e-05, | |
| "loss": 0.1013, | |
| "step": 6975 | |
| }, | |
| { | |
| "epoch": 0.2866326802202977, | |
| "grad_norm": 0.25461485981941223, | |
| "learning_rate": 7.14824996922572e-05, | |
| "loss": 0.1051, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.2876563683639416, | |
| "grad_norm": 0.22692956030368805, | |
| "learning_rate": 7.137991875589841e-05, | |
| "loss": 0.1051, | |
| "step": 7025 | |
| }, | |
| { | |
| "epoch": 0.28868005650758555, | |
| "grad_norm": 0.18912681937217712, | |
| "learning_rate": 7.127733781953962e-05, | |
| "loss": 0.1049, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 0.28970374465122944, | |
| "grad_norm": 0.29922547936439514, | |
| "learning_rate": 7.117475688318084e-05, | |
| "loss": 0.1028, | |
| "step": 7075 | |
| }, | |
| { | |
| "epoch": 0.2907274327948734, | |
| "grad_norm": 0.39868420362472534, | |
| "learning_rate": 7.107217594682205e-05, | |
| "loss": 0.1046, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 0.2917511209385173, | |
| "grad_norm": 0.2455105036497116, | |
| "learning_rate": 7.096959501046327e-05, | |
| "loss": 0.108, | |
| "step": 7125 | |
| }, | |
| { | |
| "epoch": 0.2927748090821612, | |
| "grad_norm": 0.22028543055057526, | |
| "learning_rate": 7.086701407410448e-05, | |
| "loss": 0.1031, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 0.29379849722580514, | |
| "grad_norm": 0.27611467242240906, | |
| "learning_rate": 7.076443313774568e-05, | |
| "loss": 0.1073, | |
| "step": 7175 | |
| }, | |
| { | |
| "epoch": 0.29482218536944904, | |
| "grad_norm": 0.31651851534843445, | |
| "learning_rate": 7.06618522013869e-05, | |
| "loss": 0.1005, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.295845873513093, | |
| "grad_norm": 0.2306353896856308, | |
| "learning_rate": 7.055927126502811e-05, | |
| "loss": 0.1035, | |
| "step": 7225 | |
| }, | |
| { | |
| "epoch": 0.2968695616567369, | |
| "grad_norm": 0.22398217022418976, | |
| "learning_rate": 7.045669032866933e-05, | |
| "loss": 0.1059, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 0.2978932498003808, | |
| "grad_norm": 0.24632596969604492, | |
| "learning_rate": 7.035410939231054e-05, | |
| "loss": 0.106, | |
| "step": 7275 | |
| }, | |
| { | |
| "epoch": 0.29891693794402474, | |
| "grad_norm": 0.21331587433815002, | |
| "learning_rate": 7.025152845595176e-05, | |
| "loss": 0.0994, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 0.29994062608766864, | |
| "grad_norm": 0.37877365946769714, | |
| "learning_rate": 7.014894751959297e-05, | |
| "loss": 0.102, | |
| "step": 7325 | |
| }, | |
| { | |
| "epoch": 0.3009643142313126, | |
| "grad_norm": 0.28108686208724976, | |
| "learning_rate": 7.004636658323419e-05, | |
| "loss": 0.104, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 0.3019880023749565, | |
| "grad_norm": 0.25342661142349243, | |
| "learning_rate": 6.994378564687539e-05, | |
| "loss": 0.1044, | |
| "step": 7375 | |
| }, | |
| { | |
| "epoch": 0.30301169051860044, | |
| "grad_norm": 0.7590738534927368, | |
| "learning_rate": 6.98412047105166e-05, | |
| "loss": 0.106, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 0.30403537866224434, | |
| "grad_norm": 0.20050746202468872, | |
| "learning_rate": 6.973862377415782e-05, | |
| "loss": 0.1069, | |
| "step": 7425 | |
| }, | |
| { | |
| "epoch": 0.30505906680588823, | |
| "grad_norm": 0.27144044637680054, | |
| "learning_rate": 6.963604283779903e-05, | |
| "loss": 0.104, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 0.3060827549495322, | |
| "grad_norm": 0.2616618275642395, | |
| "learning_rate": 6.953346190144025e-05, | |
| "loss": 0.101, | |
| "step": 7475 | |
| }, | |
| { | |
| "epoch": 0.3071064430931761, | |
| "grad_norm": 0.27171334624290466, | |
| "learning_rate": 6.943088096508146e-05, | |
| "loss": 0.1036, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.30813013123682004, | |
| "grad_norm": 0.19246098399162292, | |
| "learning_rate": 6.932830002872268e-05, | |
| "loss": 0.1035, | |
| "step": 7525 | |
| }, | |
| { | |
| "epoch": 0.30915381938046393, | |
| "grad_norm": 0.2488516867160797, | |
| "learning_rate": 6.922571909236388e-05, | |
| "loss": 0.1053, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 0.31017750752410783, | |
| "grad_norm": 0.2559676170349121, | |
| "learning_rate": 6.912313815600509e-05, | |
| "loss": 0.1039, | |
| "step": 7575 | |
| }, | |
| { | |
| "epoch": 0.3112011956677518, | |
| "grad_norm": 0.19615231454372406, | |
| "learning_rate": 6.90205572196463e-05, | |
| "loss": 0.1016, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 0.3122248838113957, | |
| "grad_norm": 0.22992445528507233, | |
| "learning_rate": 6.891797628328752e-05, | |
| "loss": 0.103, | |
| "step": 7625 | |
| }, | |
| { | |
| "epoch": 0.31324857195503963, | |
| "grad_norm": 0.25916945934295654, | |
| "learning_rate": 6.881539534692874e-05, | |
| "loss": 0.1033, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 0.31427226009868353, | |
| "grad_norm": 0.2485833466053009, | |
| "learning_rate": 6.871281441056995e-05, | |
| "loss": 0.1023, | |
| "step": 7675 | |
| }, | |
| { | |
| "epoch": 0.3152959482423275, | |
| "grad_norm": 0.3130246102809906, | |
| "learning_rate": 6.861023347421116e-05, | |
| "loss": 0.1013, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 0.3163196363859714, | |
| "grad_norm": 0.17889827489852905, | |
| "learning_rate": 6.850765253785238e-05, | |
| "loss": 0.1063, | |
| "step": 7725 | |
| }, | |
| { | |
| "epoch": 0.3173433245296153, | |
| "grad_norm": 0.23844337463378906, | |
| "learning_rate": 6.840507160149358e-05, | |
| "loss": 0.1024, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 0.31836701267325923, | |
| "grad_norm": 0.2489156275987625, | |
| "learning_rate": 6.83024906651348e-05, | |
| "loss": 0.1018, | |
| "step": 7775 | |
| }, | |
| { | |
| "epoch": 0.31939070081690313, | |
| "grad_norm": 0.24830876290798187, | |
| "learning_rate": 6.819990972877601e-05, | |
| "loss": 0.1018, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 0.3204143889605471, | |
| "grad_norm": 0.23647700250148773, | |
| "learning_rate": 6.809732879241722e-05, | |
| "loss": 0.1054, | |
| "step": 7825 | |
| }, | |
| { | |
| "epoch": 0.321438077104191, | |
| "grad_norm": 0.3480120003223419, | |
| "learning_rate": 6.799474785605844e-05, | |
| "loss": 0.0991, | |
| "step": 7850 | |
| }, | |
| { | |
| "epoch": 0.3224617652478349, | |
| "grad_norm": 0.2117711305618286, | |
| "learning_rate": 6.789216691969965e-05, | |
| "loss": 0.1019, | |
| "step": 7875 | |
| }, | |
| { | |
| "epoch": 0.3234854533914788, | |
| "grad_norm": 0.21510981023311615, | |
| "learning_rate": 6.778958598334087e-05, | |
| "loss": 0.1023, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 0.3245091415351227, | |
| "grad_norm": 0.21288833022117615, | |
| "learning_rate": 6.768700504698208e-05, | |
| "loss": 0.1018, | |
| "step": 7925 | |
| }, | |
| { | |
| "epoch": 0.3255328296787667, | |
| "grad_norm": 0.2654208242893219, | |
| "learning_rate": 6.758442411062328e-05, | |
| "loss": 0.1, | |
| "step": 7950 | |
| }, | |
| { | |
| "epoch": 0.3265565178224106, | |
| "grad_norm": 0.23810634016990662, | |
| "learning_rate": 6.74818431742645e-05, | |
| "loss": 0.1007, | |
| "step": 7975 | |
| }, | |
| { | |
| "epoch": 0.3275802059660545, | |
| "grad_norm": 0.26225727796554565, | |
| "learning_rate": 6.737926223790571e-05, | |
| "loss": 0.1037, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.3286038941096984, | |
| "grad_norm": 0.28832173347473145, | |
| "learning_rate": 6.727668130154693e-05, | |
| "loss": 0.1024, | |
| "step": 8025 | |
| }, | |
| { | |
| "epoch": 0.3296275822533423, | |
| "grad_norm": 0.25963491201400757, | |
| "learning_rate": 6.717410036518814e-05, | |
| "loss": 0.106, | |
| "step": 8050 | |
| }, | |
| { | |
| "epoch": 0.3306512703969863, | |
| "grad_norm": 0.3249678611755371, | |
| "learning_rate": 6.707151942882936e-05, | |
| "loss": 0.0958, | |
| "step": 8075 | |
| }, | |
| { | |
| "epoch": 0.33167495854063017, | |
| "grad_norm": 0.25855204463005066, | |
| "learning_rate": 6.696893849247057e-05, | |
| "loss": 0.1004, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 0.3326986466842741, | |
| "grad_norm": 0.2253751903772354, | |
| "learning_rate": 6.686635755611179e-05, | |
| "loss": 0.1013, | |
| "step": 8125 | |
| }, | |
| { | |
| "epoch": 0.333722334827918, | |
| "grad_norm": 0.25214654207229614, | |
| "learning_rate": 6.676377661975299e-05, | |
| "loss": 0.1016, | |
| "step": 8150 | |
| }, | |
| { | |
| "epoch": 0.3347460229715619, | |
| "grad_norm": 0.2561601996421814, | |
| "learning_rate": 6.66611956833942e-05, | |
| "loss": 0.1011, | |
| "step": 8175 | |
| }, | |
| { | |
| "epoch": 0.33576971111520587, | |
| "grad_norm": 0.2241383194923401, | |
| "learning_rate": 6.655861474703542e-05, | |
| "loss": 0.1013, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 0.33679339925884977, | |
| "grad_norm": 0.23701010644435883, | |
| "learning_rate": 6.645603381067663e-05, | |
| "loss": 0.0994, | |
| "step": 8225 | |
| }, | |
| { | |
| "epoch": 0.3378170874024937, | |
| "grad_norm": 0.2312152236700058, | |
| "learning_rate": 6.635345287431785e-05, | |
| "loss": 0.0969, | |
| "step": 8250 | |
| }, | |
| { | |
| "epoch": 0.3388407755461376, | |
| "grad_norm": 0.5713122487068176, | |
| "learning_rate": 6.625087193795906e-05, | |
| "loss": 0.1032, | |
| "step": 8275 | |
| }, | |
| { | |
| "epoch": 0.33986446368978157, | |
| "grad_norm": 0.2621745467185974, | |
| "learning_rate": 6.614829100160028e-05, | |
| "loss": 0.1009, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 0.34088815183342547, | |
| "grad_norm": 0.24803993105888367, | |
| "learning_rate": 6.604571006524149e-05, | |
| "loss": 0.0992, | |
| "step": 8325 | |
| }, | |
| { | |
| "epoch": 0.34191183997706937, | |
| "grad_norm": 0.20469900965690613, | |
| "learning_rate": 6.594312912888269e-05, | |
| "loss": 0.1021, | |
| "step": 8350 | |
| }, | |
| { | |
| "epoch": 0.3429355281207133, | |
| "grad_norm": 0.26485446095466614, | |
| "learning_rate": 6.58405481925239e-05, | |
| "loss": 0.1023, | |
| "step": 8375 | |
| }, | |
| { | |
| "epoch": 0.3439592162643572, | |
| "grad_norm": 0.30211177468299866, | |
| "learning_rate": 6.573796725616512e-05, | |
| "loss": 0.1, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 0.34498290440800117, | |
| "grad_norm": 0.19773200154304504, | |
| "learning_rate": 6.563538631980634e-05, | |
| "loss": 0.0998, | |
| "step": 8425 | |
| }, | |
| { | |
| "epoch": 0.34600659255164506, | |
| "grad_norm": 0.37499427795410156, | |
| "learning_rate": 6.553280538344755e-05, | |
| "loss": 0.0969, | |
| "step": 8450 | |
| }, | |
| { | |
| "epoch": 0.34703028069528896, | |
| "grad_norm": 0.23352007567882538, | |
| "learning_rate": 6.543022444708877e-05, | |
| "loss": 0.1013, | |
| "step": 8475 | |
| }, | |
| { | |
| "epoch": 0.3480539688389329, | |
| "grad_norm": 0.22725583612918854, | |
| "learning_rate": 6.532764351072998e-05, | |
| "loss": 0.1005, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.3490776569825768, | |
| "grad_norm": 0.2472585290670395, | |
| "learning_rate": 6.522506257437118e-05, | |
| "loss": 0.0981, | |
| "step": 8525 | |
| }, | |
| { | |
| "epoch": 0.35010134512622076, | |
| "grad_norm": 0.24253399670124054, | |
| "learning_rate": 6.51224816380124e-05, | |
| "loss": 0.1029, | |
| "step": 8550 | |
| }, | |
| { | |
| "epoch": 0.35112503326986466, | |
| "grad_norm": 0.22759589552879333, | |
| "learning_rate": 6.501990070165361e-05, | |
| "loss": 0.1026, | |
| "step": 8575 | |
| }, | |
| { | |
| "epoch": 0.3521487214135086, | |
| "grad_norm": 0.3092879056930542, | |
| "learning_rate": 6.491731976529482e-05, | |
| "loss": 0.105, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 0.3531724095571525, | |
| "grad_norm": 0.21212832629680634, | |
| "learning_rate": 6.481473882893604e-05, | |
| "loss": 0.1039, | |
| "step": 8625 | |
| }, | |
| { | |
| "epoch": 0.3541960977007964, | |
| "grad_norm": 0.22957822680473328, | |
| "learning_rate": 6.471215789257725e-05, | |
| "loss": 0.1039, | |
| "step": 8650 | |
| }, | |
| { | |
| "epoch": 0.35521978584444036, | |
| "grad_norm": 0.2514593005180359, | |
| "learning_rate": 6.460957695621847e-05, | |
| "loss": 0.105, | |
| "step": 8675 | |
| }, | |
| { | |
| "epoch": 0.35624347398808426, | |
| "grad_norm": 0.32485923171043396, | |
| "learning_rate": 6.450699601985968e-05, | |
| "loss": 0.1043, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 0.3572671621317282, | |
| "grad_norm": 0.25438931584358215, | |
| "learning_rate": 6.440441508350088e-05, | |
| "loss": 0.1033, | |
| "step": 8725 | |
| }, | |
| { | |
| "epoch": 0.3582908502753721, | |
| "grad_norm": 0.26107901334762573, | |
| "learning_rate": 6.43018341471421e-05, | |
| "loss": 0.106, | |
| "step": 8750 | |
| }, | |
| { | |
| "epoch": 0.359314538419016, | |
| "grad_norm": 0.20148183405399323, | |
| "learning_rate": 6.41992532107833e-05, | |
| "loss": 0.102, | |
| "step": 8775 | |
| }, | |
| { | |
| "epoch": 0.36033822656265996, | |
| "grad_norm": 0.3115244209766388, | |
| "learning_rate": 6.409667227442452e-05, | |
| "loss": 0.1002, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 0.36136191470630386, | |
| "grad_norm": 0.2722707688808441, | |
| "learning_rate": 6.399409133806573e-05, | |
| "loss": 0.0993, | |
| "step": 8825 | |
| }, | |
| { | |
| "epoch": 0.3623856028499478, | |
| "grad_norm": 0.3244341015815735, | |
| "learning_rate": 6.389151040170694e-05, | |
| "loss": 0.0973, | |
| "step": 8850 | |
| }, | |
| { | |
| "epoch": 0.3634092909935917, | |
| "grad_norm": 0.24697239696979523, | |
| "learning_rate": 6.378892946534816e-05, | |
| "loss": 0.0958, | |
| "step": 8875 | |
| }, | |
| { | |
| "epoch": 0.36443297913723566, | |
| "grad_norm": 0.23170702159404755, | |
| "learning_rate": 6.368634852898937e-05, | |
| "loss": 0.1039, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 0.36545666728087955, | |
| "grad_norm": 0.25722336769104004, | |
| "learning_rate": 6.358376759263059e-05, | |
| "loss": 0.1027, | |
| "step": 8925 | |
| }, | |
| { | |
| "epoch": 0.36648035542452345, | |
| "grad_norm": 0.2329777032136917, | |
| "learning_rate": 6.348118665627179e-05, | |
| "loss": 0.1003, | |
| "step": 8950 | |
| }, | |
| { | |
| "epoch": 0.3675040435681674, | |
| "grad_norm": 0.3008142411708832, | |
| "learning_rate": 6.3378605719913e-05, | |
| "loss": 0.0975, | |
| "step": 8975 | |
| }, | |
| { | |
| "epoch": 0.3685277317118113, | |
| "grad_norm": 0.19098886847496033, | |
| "learning_rate": 6.327602478355422e-05, | |
| "loss": 0.097, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.36955141985545525, | |
| "grad_norm": 0.2393869310617447, | |
| "learning_rate": 6.317344384719543e-05, | |
| "loss": 0.0992, | |
| "step": 9025 | |
| }, | |
| { | |
| "epoch": 0.37057510799909915, | |
| "grad_norm": 0.24962279200553894, | |
| "learning_rate": 6.307086291083665e-05, | |
| "loss": 0.1006, | |
| "step": 9050 | |
| }, | |
| { | |
| "epoch": 0.37159879614274305, | |
| "grad_norm": 0.20281440019607544, | |
| "learning_rate": 6.296828197447786e-05, | |
| "loss": 0.097, | |
| "step": 9075 | |
| }, | |
| { | |
| "epoch": 0.372622484286387, | |
| "grad_norm": 0.21669328212738037, | |
| "learning_rate": 6.286570103811908e-05, | |
| "loss": 0.1008, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 0.3736461724300309, | |
| "grad_norm": 0.21775703132152557, | |
| "learning_rate": 6.276312010176029e-05, | |
| "loss": 0.1046, | |
| "step": 9125 | |
| }, | |
| { | |
| "epoch": 0.37466986057367485, | |
| "grad_norm": 0.24492838978767395, | |
| "learning_rate": 6.26605391654015e-05, | |
| "loss": 0.0989, | |
| "step": 9150 | |
| }, | |
| { | |
| "epoch": 0.37569354871731875, | |
| "grad_norm": 0.2119276523590088, | |
| "learning_rate": 6.255795822904271e-05, | |
| "loss": 0.1038, | |
| "step": 9175 | |
| }, | |
| { | |
| "epoch": 0.3767172368609627, | |
| "grad_norm": 0.2842216193675995, | |
| "learning_rate": 6.245537729268392e-05, | |
| "loss": 0.1004, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 0.3777409250046066, | |
| "grad_norm": 0.2775871455669403, | |
| "learning_rate": 6.235279635632514e-05, | |
| "loss": 0.1008, | |
| "step": 9225 | |
| }, | |
| { | |
| "epoch": 0.3787646131482505, | |
| "grad_norm": 0.26387348771095276, | |
| "learning_rate": 6.225021541996635e-05, | |
| "loss": 0.0972, | |
| "step": 9250 | |
| }, | |
| { | |
| "epoch": 0.37978830129189445, | |
| "grad_norm": 0.2945527136325836, | |
| "learning_rate": 6.214763448360757e-05, | |
| "loss": 0.1044, | |
| "step": 9275 | |
| }, | |
| { | |
| "epoch": 0.38081198943553835, | |
| "grad_norm": 0.34967219829559326, | |
| "learning_rate": 6.204505354724878e-05, | |
| "loss": 0.1018, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 0.3818356775791823, | |
| "grad_norm": 0.2373281568288803, | |
| "learning_rate": 6.194247261089e-05, | |
| "loss": 0.1028, | |
| "step": 9325 | |
| }, | |
| { | |
| "epoch": 0.3828593657228262, | |
| "grad_norm": 0.27347394824028015, | |
| "learning_rate": 6.18398916745312e-05, | |
| "loss": 0.0995, | |
| "step": 9350 | |
| }, | |
| { | |
| "epoch": 0.3838830538664701, | |
| "grad_norm": 0.2860616147518158, | |
| "learning_rate": 6.173731073817241e-05, | |
| "loss": 0.0983, | |
| "step": 9375 | |
| }, | |
| { | |
| "epoch": 0.38490674201011404, | |
| "grad_norm": 0.3643983006477356, | |
| "learning_rate": 6.163472980181363e-05, | |
| "loss": 0.0957, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 0.38593043015375794, | |
| "grad_norm": 0.3181641399860382, | |
| "learning_rate": 6.153214886545484e-05, | |
| "loss": 0.0989, | |
| "step": 9425 | |
| }, | |
| { | |
| "epoch": 0.3869541182974019, | |
| "grad_norm": 0.24089764058589935, | |
| "learning_rate": 6.142956792909606e-05, | |
| "loss": 0.0982, | |
| "step": 9450 | |
| }, | |
| { | |
| "epoch": 0.3879778064410458, | |
| "grad_norm": 0.2490035593509674, | |
| "learning_rate": 6.132698699273727e-05, | |
| "loss": 0.1039, | |
| "step": 9475 | |
| }, | |
| { | |
| "epoch": 0.38900149458468974, | |
| "grad_norm": 0.2765063941478729, | |
| "learning_rate": 6.122440605637849e-05, | |
| "loss": 0.0937, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.39002518272833364, | |
| "grad_norm": 0.45849937200546265, | |
| "learning_rate": 6.11218251200197e-05, | |
| "loss": 0.1002, | |
| "step": 9525 | |
| }, | |
| { | |
| "epoch": 0.39104887087197754, | |
| "grad_norm": 0.23391731083393097, | |
| "learning_rate": 6.101924418366091e-05, | |
| "loss": 0.0995, | |
| "step": 9550 | |
| }, | |
| { | |
| "epoch": 0.3920725590156215, | |
| "grad_norm": 0.258109986782074, | |
| "learning_rate": 6.091666324730212e-05, | |
| "loss": 0.1032, | |
| "step": 9575 | |
| }, | |
| { | |
| "epoch": 0.3930962471592654, | |
| "grad_norm": 0.2020760029554367, | |
| "learning_rate": 6.081408231094333e-05, | |
| "loss": 0.1012, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 0.39411993530290934, | |
| "grad_norm": 0.20322605967521667, | |
| "learning_rate": 6.0711501374584545e-05, | |
| "loss": 0.1009, | |
| "step": 9625 | |
| }, | |
| { | |
| "epoch": 0.39514362344655324, | |
| "grad_norm": 0.3139131963253021, | |
| "learning_rate": 6.060892043822576e-05, | |
| "loss": 0.1009, | |
| "step": 9650 | |
| }, | |
| { | |
| "epoch": 0.39616731159019714, | |
| "grad_norm": 0.2019822746515274, | |
| "learning_rate": 6.0506339501866974e-05, | |
| "loss": 0.1021, | |
| "step": 9675 | |
| }, | |
| { | |
| "epoch": 0.3971909997338411, | |
| "grad_norm": 0.21363505721092224, | |
| "learning_rate": 6.040375856550818e-05, | |
| "loss": 0.0996, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 0.398214687877485, | |
| "grad_norm": 0.25607529282569885, | |
| "learning_rate": 6.03011776291494e-05, | |
| "loss": 0.0968, | |
| "step": 9725 | |
| }, | |
| { | |
| "epoch": 0.39923837602112894, | |
| "grad_norm": 0.28837454319000244, | |
| "learning_rate": 6.019859669279061e-05, | |
| "loss": 0.1003, | |
| "step": 9750 | |
| }, | |
| { | |
| "epoch": 0.40026206416477284, | |
| "grad_norm": 0.22750523686408997, | |
| "learning_rate": 6.009601575643182e-05, | |
| "loss": 0.0976, | |
| "step": 9775 | |
| }, | |
| { | |
| "epoch": 0.4012857523084168, | |
| "grad_norm": 0.2659379541873932, | |
| "learning_rate": 5.9993434820073034e-05, | |
| "loss": 0.0991, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 0.4023094404520607, | |
| "grad_norm": 0.3965132534503937, | |
| "learning_rate": 5.989085388371425e-05, | |
| "loss": 0.1039, | |
| "step": 9825 | |
| }, | |
| { | |
| "epoch": 0.4033331285957046, | |
| "grad_norm": 0.2643307149410248, | |
| "learning_rate": 5.978827294735546e-05, | |
| "loss": 0.1018, | |
| "step": 9850 | |
| }, | |
| { | |
| "epoch": 0.40435681673934853, | |
| "grad_norm": 0.2745136618614197, | |
| "learning_rate": 5.968569201099667e-05, | |
| "loss": 0.1037, | |
| "step": 9875 | |
| }, | |
| { | |
| "epoch": 0.40538050488299243, | |
| "grad_norm": 0.235930934548378, | |
| "learning_rate": 5.9583111074637886e-05, | |
| "loss": 0.0995, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 0.4064041930266364, | |
| "grad_norm": 0.23560784757137299, | |
| "learning_rate": 5.94805301382791e-05, | |
| "loss": 0.1001, | |
| "step": 9925 | |
| }, | |
| { | |
| "epoch": 0.4074278811702803, | |
| "grad_norm": 0.3324751555919647, | |
| "learning_rate": 5.9377949201920315e-05, | |
| "loss": 0.1007, | |
| "step": 9950 | |
| }, | |
| { | |
| "epoch": 0.4084515693139242, | |
| "grad_norm": 0.22333605587482452, | |
| "learning_rate": 5.927536826556152e-05, | |
| "loss": 0.1035, | |
| "step": 9975 | |
| }, | |
| { | |
| "epoch": 0.40947525745756813, | |
| "grad_norm": 0.23905926942825317, | |
| "learning_rate": 5.917278732920274e-05, | |
| "loss": 0.1019, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.41049894560121203, | |
| "grad_norm": 0.24543020129203796, | |
| "learning_rate": 5.907020639284395e-05, | |
| "loss": 0.1005, | |
| "step": 10025 | |
| }, | |
| { | |
| "epoch": 0.411522633744856, | |
| "grad_norm": 0.2597710192203522, | |
| "learning_rate": 5.896762545648517e-05, | |
| "loss": 0.0937, | |
| "step": 10050 | |
| }, | |
| { | |
| "epoch": 0.4125463218884999, | |
| "grad_norm": 0.2141934633255005, | |
| "learning_rate": 5.8865044520126375e-05, | |
| "loss": 0.1047, | |
| "step": 10075 | |
| }, | |
| { | |
| "epoch": 0.41357001003214383, | |
| "grad_norm": 0.18962982296943665, | |
| "learning_rate": 5.876246358376759e-05, | |
| "loss": 0.1019, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 0.41459369817578773, | |
| "grad_norm": 0.16786764562129974, | |
| "learning_rate": 5.8659882647408804e-05, | |
| "loss": 0.098, | |
| "step": 10125 | |
| }, | |
| { | |
| "epoch": 0.4156173863194316, | |
| "grad_norm": 0.2587350904941559, | |
| "learning_rate": 5.855730171105002e-05, | |
| "loss": 0.1018, | |
| "step": 10150 | |
| }, | |
| { | |
| "epoch": 0.4166410744630756, | |
| "grad_norm": 0.23551388084888458, | |
| "learning_rate": 5.845472077469123e-05, | |
| "loss": 0.1017, | |
| "step": 10175 | |
| }, | |
| { | |
| "epoch": 0.4176647626067195, | |
| "grad_norm": 0.40040743350982666, | |
| "learning_rate": 5.835213983833244e-05, | |
| "loss": 0.099, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 0.41868845075036343, | |
| "grad_norm": 0.274138480424881, | |
| "learning_rate": 5.8249558901973656e-05, | |
| "loss": 0.0988, | |
| "step": 10225 | |
| }, | |
| { | |
| "epoch": 0.4197121388940073, | |
| "grad_norm": 0.21808317303657532, | |
| "learning_rate": 5.814697796561487e-05, | |
| "loss": 0.0974, | |
| "step": 10250 | |
| }, | |
| { | |
| "epoch": 0.4207358270376513, | |
| "grad_norm": 0.2756749093532562, | |
| "learning_rate": 5.804439702925608e-05, | |
| "loss": 0.1007, | |
| "step": 10275 | |
| }, | |
| { | |
| "epoch": 0.4217595151812952, | |
| "grad_norm": 0.28059181571006775, | |
| "learning_rate": 5.7941816092897293e-05, | |
| "loss": 0.0956, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 0.42278320332493907, | |
| "grad_norm": 0.2666233479976654, | |
| "learning_rate": 5.783923515653851e-05, | |
| "loss": 0.1014, | |
| "step": 10325 | |
| }, | |
| { | |
| "epoch": 0.423806891468583, | |
| "grad_norm": 0.17817972600460052, | |
| "learning_rate": 5.773665422017972e-05, | |
| "loss": 0.098, | |
| "step": 10350 | |
| }, | |
| { | |
| "epoch": 0.4248305796122269, | |
| "grad_norm": 0.2498740404844284, | |
| "learning_rate": 5.763407328382093e-05, | |
| "loss": 0.1, | |
| "step": 10375 | |
| }, | |
| { | |
| "epoch": 0.4258542677558709, | |
| "grad_norm": 0.2427319437265396, | |
| "learning_rate": 5.7531492347462145e-05, | |
| "loss": 0.0985, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 0.42687795589951477, | |
| "grad_norm": 0.1904958337545395, | |
| "learning_rate": 5.742891141110336e-05, | |
| "loss": 0.1024, | |
| "step": 10425 | |
| }, | |
| { | |
| "epoch": 0.42790164404315867, | |
| "grad_norm": 0.246423602104187, | |
| "learning_rate": 5.7326330474744575e-05, | |
| "loss": 0.0996, | |
| "step": 10450 | |
| }, | |
| { | |
| "epoch": 0.4289253321868026, | |
| "grad_norm": 0.3124719262123108, | |
| "learning_rate": 5.722374953838578e-05, | |
| "loss": 0.0985, | |
| "step": 10475 | |
| }, | |
| { | |
| "epoch": 0.4299490203304465, | |
| "grad_norm": 0.2046365588903427, | |
| "learning_rate": 5.7121168602027e-05, | |
| "loss": 0.1031, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.43097270847409047, | |
| "grad_norm": 0.22781619429588318, | |
| "learning_rate": 5.701858766566821e-05, | |
| "loss": 0.0963, | |
| "step": 10525 | |
| }, | |
| { | |
| "epoch": 0.43199639661773437, | |
| "grad_norm": 0.28827908635139465, | |
| "learning_rate": 5.6916006729309427e-05, | |
| "loss": 0.1015, | |
| "step": 10550 | |
| }, | |
| { | |
| "epoch": 0.4330200847613783, | |
| "grad_norm": 0.27641138434410095, | |
| "learning_rate": 5.6813425792950634e-05, | |
| "loss": 0.0973, | |
| "step": 10575 | |
| }, | |
| { | |
| "epoch": 0.4340437729050222, | |
| "grad_norm": 0.3545154929161072, | |
| "learning_rate": 5.671084485659185e-05, | |
| "loss": 0.0954, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 0.4350674610486661, | |
| "grad_norm": 0.28629690408706665, | |
| "learning_rate": 5.6608263920233064e-05, | |
| "loss": 0.0937, | |
| "step": 10625 | |
| }, | |
| { | |
| "epoch": 0.43609114919231007, | |
| "grad_norm": 0.2316763550043106, | |
| "learning_rate": 5.650568298387428e-05, | |
| "loss": 0.0975, | |
| "step": 10650 | |
| }, | |
| { | |
| "epoch": 0.43711483733595397, | |
| "grad_norm": 0.28071022033691406, | |
| "learning_rate": 5.6403102047515486e-05, | |
| "loss": 0.1011, | |
| "step": 10675 | |
| }, | |
| { | |
| "epoch": 0.4381385254795979, | |
| "grad_norm": 0.2439073920249939, | |
| "learning_rate": 5.63005211111567e-05, | |
| "loss": 0.0994, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 0.4391622136232418, | |
| "grad_norm": 0.2952822744846344, | |
| "learning_rate": 5.6197940174797916e-05, | |
| "loss": 0.0983, | |
| "step": 10725 | |
| }, | |
| { | |
| "epoch": 0.4401859017668857, | |
| "grad_norm": 0.28434520959854126, | |
| "learning_rate": 5.6095359238439124e-05, | |
| "loss": 0.0962, | |
| "step": 10750 | |
| }, | |
| { | |
| "epoch": 0.44120958991052966, | |
| "grad_norm": 0.24351637065410614, | |
| "learning_rate": 5.599277830208034e-05, | |
| "loss": 0.0944, | |
| "step": 10775 | |
| }, | |
| { | |
| "epoch": 0.44223327805417356, | |
| "grad_norm": 0.27679064869880676, | |
| "learning_rate": 5.589019736572155e-05, | |
| "loss": 0.0959, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 0.4432569661978175, | |
| "grad_norm": 0.3108427822589874, | |
| "learning_rate": 5.578761642936277e-05, | |
| "loss": 0.0941, | |
| "step": 10825 | |
| }, | |
| { | |
| "epoch": 0.4442806543414614, | |
| "grad_norm": 0.19497576355934143, | |
| "learning_rate": 5.5685035493003976e-05, | |
| "loss": 0.0981, | |
| "step": 10850 | |
| }, | |
| { | |
| "epoch": 0.44530434248510536, | |
| "grad_norm": 0.22953549027442932, | |
| "learning_rate": 5.558245455664519e-05, | |
| "loss": 0.0976, | |
| "step": 10875 | |
| }, | |
| { | |
| "epoch": 0.44632803062874926, | |
| "grad_norm": 0.5247841477394104, | |
| "learning_rate": 5.5479873620286405e-05, | |
| "loss": 0.1014, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 0.44735171877239316, | |
| "grad_norm": 0.20830635726451874, | |
| "learning_rate": 5.537729268392762e-05, | |
| "loss": 0.0946, | |
| "step": 10925 | |
| }, | |
| { | |
| "epoch": 0.4483754069160371, | |
| "grad_norm": 0.24027594923973083, | |
| "learning_rate": 5.527471174756883e-05, | |
| "loss": 0.0953, | |
| "step": 10950 | |
| }, | |
| { | |
| "epoch": 0.449399095059681, | |
| "grad_norm": 0.2040860950946808, | |
| "learning_rate": 5.517213081121004e-05, | |
| "loss": 0.0961, | |
| "step": 10975 | |
| }, | |
| { | |
| "epoch": 0.45042278320332496, | |
| "grad_norm": 0.18586771190166473, | |
| "learning_rate": 5.506954987485126e-05, | |
| "loss": 0.0982, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.45144647134696886, | |
| "grad_norm": 0.32001617550849915, | |
| "learning_rate": 5.496696893849247e-05, | |
| "loss": 0.0981, | |
| "step": 11025 | |
| }, | |
| { | |
| "epoch": 0.45247015949061276, | |
| "grad_norm": 0.28242385387420654, | |
| "learning_rate": 5.486438800213368e-05, | |
| "loss": 0.0981, | |
| "step": 11050 | |
| }, | |
| { | |
| "epoch": 0.4534938476342567, | |
| "grad_norm": 0.3456820547580719, | |
| "learning_rate": 5.4761807065774894e-05, | |
| "loss": 0.0941, | |
| "step": 11075 | |
| }, | |
| { | |
| "epoch": 0.4545175357779006, | |
| "grad_norm": 0.22706662118434906, | |
| "learning_rate": 5.465922612941611e-05, | |
| "loss": 0.0985, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 0.45554122392154456, | |
| "grad_norm": 0.2593896985054016, | |
| "learning_rate": 5.455664519305732e-05, | |
| "loss": 0.0983, | |
| "step": 11125 | |
| }, | |
| { | |
| "epoch": 0.45656491206518846, | |
| "grad_norm": 0.22201375663280487, | |
| "learning_rate": 5.445406425669853e-05, | |
| "loss": 0.1026, | |
| "step": 11150 | |
| }, | |
| { | |
| "epoch": 0.4575886002088324, | |
| "grad_norm": 0.23291830718517303, | |
| "learning_rate": 5.4351483320339746e-05, | |
| "loss": 0.0995, | |
| "step": 11175 | |
| }, | |
| { | |
| "epoch": 0.4586122883524763, | |
| "grad_norm": 0.24490538239479065, | |
| "learning_rate": 5.424890238398096e-05, | |
| "loss": 0.0953, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 0.4596359764961202, | |
| "grad_norm": 0.3179132640361786, | |
| "learning_rate": 5.4146321447622175e-05, | |
| "loss": 0.0983, | |
| "step": 11225 | |
| }, | |
| { | |
| "epoch": 0.46065966463976415, | |
| "grad_norm": 0.23889416456222534, | |
| "learning_rate": 5.404374051126338e-05, | |
| "loss": 0.0971, | |
| "step": 11250 | |
| }, | |
| { | |
| "epoch": 0.46168335278340805, | |
| "grad_norm": 0.5037365555763245, | |
| "learning_rate": 5.39411595749046e-05, | |
| "loss": 0.0936, | |
| "step": 11275 | |
| }, | |
| { | |
| "epoch": 0.462707040927052, | |
| "grad_norm": 0.2585156559944153, | |
| "learning_rate": 5.383857863854581e-05, | |
| "loss": 0.0918, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 0.4637307290706959, | |
| "grad_norm": 0.2691129148006439, | |
| "learning_rate": 5.373599770218703e-05, | |
| "loss": 0.0959, | |
| "step": 11325 | |
| }, | |
| { | |
| "epoch": 0.4647544172143398, | |
| "grad_norm": 0.24569182097911835, | |
| "learning_rate": 5.3633416765828235e-05, | |
| "loss": 0.0966, | |
| "step": 11350 | |
| }, | |
| { | |
| "epoch": 0.46577810535798375, | |
| "grad_norm": 0.3655073344707489, | |
| "learning_rate": 5.353083582946945e-05, | |
| "loss": 0.0951, | |
| "step": 11375 | |
| }, | |
| { | |
| "epoch": 0.46680179350162765, | |
| "grad_norm": 0.24223706126213074, | |
| "learning_rate": 5.3428254893110664e-05, | |
| "loss": 0.1008, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 0.4678254816452716, | |
| "grad_norm": 0.2586074769496918, | |
| "learning_rate": 5.332567395675188e-05, | |
| "loss": 0.1011, | |
| "step": 11425 | |
| }, | |
| { | |
| "epoch": 0.4688491697889155, | |
| "grad_norm": 0.2603899836540222, | |
| "learning_rate": 5.322309302039309e-05, | |
| "loss": 0.0984, | |
| "step": 11450 | |
| }, | |
| { | |
| "epoch": 0.46987285793255945, | |
| "grad_norm": 0.25967130064964294, | |
| "learning_rate": 5.31205120840343e-05, | |
| "loss": 0.0965, | |
| "step": 11475 | |
| }, | |
| { | |
| "epoch": 0.47089654607620335, | |
| "grad_norm": 0.2673439085483551, | |
| "learning_rate": 5.3017931147675516e-05, | |
| "loss": 0.1025, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.47192023421984725, | |
| "grad_norm": 0.24883116781711578, | |
| "learning_rate": 5.291535021131673e-05, | |
| "loss": 0.0943, | |
| "step": 11525 | |
| }, | |
| { | |
| "epoch": 0.4729439223634912, | |
| "grad_norm": 0.29023605585098267, | |
| "learning_rate": 5.281276927495794e-05, | |
| "loss": 0.0993, | |
| "step": 11550 | |
| }, | |
| { | |
| "epoch": 0.4739676105071351, | |
| "grad_norm": 0.21741856634616852, | |
| "learning_rate": 5.2710188338599153e-05, | |
| "loss": 0.0937, | |
| "step": 11575 | |
| }, | |
| { | |
| "epoch": 0.47499129865077905, | |
| "grad_norm": 0.24658936262130737, | |
| "learning_rate": 5.260760740224037e-05, | |
| "loss": 0.0971, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 0.47601498679442295, | |
| "grad_norm": 0.3309827148914337, | |
| "learning_rate": 5.2505026465881576e-05, | |
| "loss": 0.1003, | |
| "step": 11625 | |
| }, | |
| { | |
| "epoch": 0.47703867493806684, | |
| "grad_norm": 0.22925116121768951, | |
| "learning_rate": 5.240244552952279e-05, | |
| "loss": 0.0961, | |
| "step": 11650 | |
| }, | |
| { | |
| "epoch": 0.4780623630817108, | |
| "grad_norm": 0.33367425203323364, | |
| "learning_rate": 5.2299864593164005e-05, | |
| "loss": 0.0959, | |
| "step": 11675 | |
| }, | |
| { | |
| "epoch": 0.4790860512253547, | |
| "grad_norm": 0.2225172519683838, | |
| "learning_rate": 5.219728365680522e-05, | |
| "loss": 0.0983, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 0.48010973936899864, | |
| "grad_norm": 0.26799845695495605, | |
| "learning_rate": 5.209470272044643e-05, | |
| "loss": 0.0984, | |
| "step": 11725 | |
| }, | |
| { | |
| "epoch": 0.48113342751264254, | |
| "grad_norm": 0.28932616114616394, | |
| "learning_rate": 5.199212178408764e-05, | |
| "loss": 0.0973, | |
| "step": 11750 | |
| }, | |
| { | |
| "epoch": 0.4821571156562865, | |
| "grad_norm": 0.3406207859516144, | |
| "learning_rate": 5.188954084772886e-05, | |
| "loss": 0.091, | |
| "step": 11775 | |
| }, | |
| { | |
| "epoch": 0.4831808037999304, | |
| "grad_norm": 0.2970975935459137, | |
| "learning_rate": 5.178695991137007e-05, | |
| "loss": 0.0939, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 0.4842044919435743, | |
| "grad_norm": 0.2747635245323181, | |
| "learning_rate": 5.168437897501128e-05, | |
| "loss": 0.0927, | |
| "step": 11825 | |
| }, | |
| { | |
| "epoch": 0.48522818008721824, | |
| "grad_norm": 0.2211136370897293, | |
| "learning_rate": 5.1581798038652494e-05, | |
| "loss": 0.096, | |
| "step": 11850 | |
| }, | |
| { | |
| "epoch": 0.48625186823086214, | |
| "grad_norm": 0.2881365418434143, | |
| "learning_rate": 5.147921710229371e-05, | |
| "loss": 0.0933, | |
| "step": 11875 | |
| }, | |
| { | |
| "epoch": 0.4872755563745061, | |
| "grad_norm": 0.2213411182165146, | |
| "learning_rate": 5.1376636165934924e-05, | |
| "loss": 0.0982, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 0.48829924451815, | |
| "grad_norm": 0.23638983070850372, | |
| "learning_rate": 5.127405522957613e-05, | |
| "loss": 0.1, | |
| "step": 11925 | |
| }, | |
| { | |
| "epoch": 0.4893229326617939, | |
| "grad_norm": 0.2544683814048767, | |
| "learning_rate": 5.1171474293217346e-05, | |
| "loss": 0.1005, | |
| "step": 11950 | |
| }, | |
| { | |
| "epoch": 0.49034662080543784, | |
| "grad_norm": 0.3138396441936493, | |
| "learning_rate": 5.106889335685856e-05, | |
| "loss": 0.0952, | |
| "step": 11975 | |
| }, | |
| { | |
| "epoch": 0.49137030894908174, | |
| "grad_norm": 0.352205365896225, | |
| "learning_rate": 5.0966312420499776e-05, | |
| "loss": 0.0954, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.4923939970927257, | |
| "grad_norm": 0.2083396166563034, | |
| "learning_rate": 5.0863731484140984e-05, | |
| "loss": 0.0947, | |
| "step": 12025 | |
| }, | |
| { | |
| "epoch": 0.4934176852363696, | |
| "grad_norm": 0.2839849591255188, | |
| "learning_rate": 5.07611505477822e-05, | |
| "loss": 0.0986, | |
| "step": 12050 | |
| }, | |
| { | |
| "epoch": 0.49444137338001354, | |
| "grad_norm": 0.26629742980003357, | |
| "learning_rate": 5.065856961142341e-05, | |
| "loss": 0.1007, | |
| "step": 12075 | |
| }, | |
| { | |
| "epoch": 0.49546506152365744, | |
| "grad_norm": 0.2845945656299591, | |
| "learning_rate": 5.055598867506463e-05, | |
| "loss": 0.0995, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 0.49648874966730133, | |
| "grad_norm": 0.22998517751693726, | |
| "learning_rate": 5.0453407738705835e-05, | |
| "loss": 0.0971, | |
| "step": 12125 | |
| }, | |
| { | |
| "epoch": 0.4975124378109453, | |
| "grad_norm": 0.21335995197296143, | |
| "learning_rate": 5.035082680234705e-05, | |
| "loss": 0.0956, | |
| "step": 12150 | |
| }, | |
| { | |
| "epoch": 0.4985361259545892, | |
| "grad_norm": 0.2018250823020935, | |
| "learning_rate": 5.0248245865988265e-05, | |
| "loss": 0.0982, | |
| "step": 12175 | |
| }, | |
| { | |
| "epoch": 0.49955981409823313, | |
| "grad_norm": 0.2268654853105545, | |
| "learning_rate": 5.014566492962948e-05, | |
| "loss": 0.0951, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 0.5005835022418771, | |
| "grad_norm": 0.22491568326950073, | |
| "learning_rate": 5.004308399327069e-05, | |
| "loss": 0.0991, | |
| "step": 12225 | |
| }, | |
| { | |
| "epoch": 0.5016071903855209, | |
| "grad_norm": 0.2399354726076126, | |
| "learning_rate": 4.994050305691191e-05, | |
| "loss": 0.0978, | |
| "step": 12250 | |
| }, | |
| { | |
| "epoch": 0.5026308785291649, | |
| "grad_norm": 0.2331288754940033, | |
| "learning_rate": 4.9837922120553123e-05, | |
| "loss": 0.0959, | |
| "step": 12275 | |
| }, | |
| { | |
| "epoch": 0.5036545666728088, | |
| "grad_norm": 0.23224005103111267, | |
| "learning_rate": 4.973534118419433e-05, | |
| "loss": 0.0957, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 0.5046782548164527, | |
| "grad_norm": 0.28165706992149353, | |
| "learning_rate": 4.9632760247835546e-05, | |
| "loss": 0.0985, | |
| "step": 12325 | |
| }, | |
| { | |
| "epoch": 0.5057019429600966, | |
| "grad_norm": 0.22725163400173187, | |
| "learning_rate": 4.953017931147676e-05, | |
| "loss": 0.0934, | |
| "step": 12350 | |
| }, | |
| { | |
| "epoch": 0.5067256311037406, | |
| "grad_norm": 0.30300387740135193, | |
| "learning_rate": 4.9427598375117975e-05, | |
| "loss": 0.0947, | |
| "step": 12375 | |
| }, | |
| { | |
| "epoch": 0.5077493192473844, | |
| "grad_norm": 0.22563788294792175, | |
| "learning_rate": 4.932501743875918e-05, | |
| "loss": 0.0964, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 0.5087730073910284, | |
| "grad_norm": 0.2117646187543869, | |
| "learning_rate": 4.92224365024004e-05, | |
| "loss": 0.0952, | |
| "step": 12425 | |
| }, | |
| { | |
| "epoch": 0.5097966955346723, | |
| "grad_norm": 0.22831501066684723, | |
| "learning_rate": 4.911985556604161e-05, | |
| "loss": 0.0965, | |
| "step": 12450 | |
| }, | |
| { | |
| "epoch": 0.5108203836783163, | |
| "grad_norm": 0.2967502474784851, | |
| "learning_rate": 4.901727462968283e-05, | |
| "loss": 0.0947, | |
| "step": 12475 | |
| }, | |
| { | |
| "epoch": 0.5118440718219601, | |
| "grad_norm": 0.22398816049098969, | |
| "learning_rate": 4.8914693693324035e-05, | |
| "loss": 0.0935, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.5128677599656041, | |
| "grad_norm": 0.26190030574798584, | |
| "learning_rate": 4.881211275696525e-05, | |
| "loss": 0.0965, | |
| "step": 12525 | |
| }, | |
| { | |
| "epoch": 0.513891448109248, | |
| "grad_norm": 0.2718106508255005, | |
| "learning_rate": 4.8709531820606464e-05, | |
| "loss": 0.0955, | |
| "step": 12550 | |
| }, | |
| { | |
| "epoch": 0.5149151362528919, | |
| "grad_norm": 0.27051568031311035, | |
| "learning_rate": 4.860695088424768e-05, | |
| "loss": 0.094, | |
| "step": 12575 | |
| }, | |
| { | |
| "epoch": 0.5159388243965358, | |
| "grad_norm": 0.2031087726354599, | |
| "learning_rate": 4.850436994788889e-05, | |
| "loss": 0.0988, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 0.5169625125401798, | |
| "grad_norm": 0.23844382166862488, | |
| "learning_rate": 4.84017890115301e-05, | |
| "loss": 0.0915, | |
| "step": 12625 | |
| }, | |
| { | |
| "epoch": 0.5179862006838237, | |
| "grad_norm": 0.31874755024909973, | |
| "learning_rate": 4.8299208075171316e-05, | |
| "loss": 0.0908, | |
| "step": 12650 | |
| }, | |
| { | |
| "epoch": 0.5190098888274676, | |
| "grad_norm": 0.23138810694217682, | |
| "learning_rate": 4.819662713881253e-05, | |
| "loss": 0.0962, | |
| "step": 12675 | |
| }, | |
| { | |
| "epoch": 0.5200335769711115, | |
| "grad_norm": 0.27298617362976074, | |
| "learning_rate": 4.809404620245374e-05, | |
| "loss": 0.0975, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 0.5210572651147555, | |
| "grad_norm": 0.25157856941223145, | |
| "learning_rate": 4.7991465266094954e-05, | |
| "loss": 0.0967, | |
| "step": 12725 | |
| }, | |
| { | |
| "epoch": 0.5220809532583993, | |
| "grad_norm": 0.20571890473365784, | |
| "learning_rate": 4.788888432973617e-05, | |
| "loss": 0.0939, | |
| "step": 12750 | |
| }, | |
| { | |
| "epoch": 0.5231046414020433, | |
| "grad_norm": 0.24462303519248962, | |
| "learning_rate": 4.778630339337738e-05, | |
| "loss": 0.0944, | |
| "step": 12775 | |
| }, | |
| { | |
| "epoch": 0.5241283295456872, | |
| "grad_norm": 0.2392750382423401, | |
| "learning_rate": 4.768372245701859e-05, | |
| "loss": 0.0959, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 0.5251520176893312, | |
| "grad_norm": 0.2759506106376648, | |
| "learning_rate": 4.7581141520659805e-05, | |
| "loss": 0.0957, | |
| "step": 12825 | |
| }, | |
| { | |
| "epoch": 0.526175705832975, | |
| "grad_norm": 0.24135975539684296, | |
| "learning_rate": 4.747856058430102e-05, | |
| "loss": 0.0917, | |
| "step": 12850 | |
| }, | |
| { | |
| "epoch": 0.527199393976619, | |
| "grad_norm": 0.2595539689064026, | |
| "learning_rate": 4.737597964794223e-05, | |
| "loss": 0.0984, | |
| "step": 12875 | |
| }, | |
| { | |
| "epoch": 0.5282230821202629, | |
| "grad_norm": 0.2650289535522461, | |
| "learning_rate": 4.727339871158344e-05, | |
| "loss": 0.0938, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 0.5292467702639068, | |
| "grad_norm": 0.24425174295902252, | |
| "learning_rate": 4.717081777522466e-05, | |
| "loss": 0.0991, | |
| "step": 12925 | |
| }, | |
| { | |
| "epoch": 0.5302704584075507, | |
| "grad_norm": 0.24873086810112, | |
| "learning_rate": 4.706823683886587e-05, | |
| "loss": 0.0914, | |
| "step": 12950 | |
| }, | |
| { | |
| "epoch": 0.5312941465511947, | |
| "grad_norm": 0.280268132686615, | |
| "learning_rate": 4.696565590250708e-05, | |
| "loss": 0.0966, | |
| "step": 12975 | |
| }, | |
| { | |
| "epoch": 0.5323178346948385, | |
| "grad_norm": 0.24281346797943115, | |
| "learning_rate": 4.6863074966148295e-05, | |
| "loss": 0.0932, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.5333415228384825, | |
| "grad_norm": 0.24113261699676514, | |
| "learning_rate": 4.676049402978951e-05, | |
| "loss": 0.0944, | |
| "step": 13025 | |
| }, | |
| { | |
| "epoch": 0.5343652109821264, | |
| "grad_norm": 0.2524602711200714, | |
| "learning_rate": 4.6657913093430724e-05, | |
| "loss": 0.0922, | |
| "step": 13050 | |
| }, | |
| { | |
| "epoch": 0.5353888991257704, | |
| "grad_norm": 0.24346871674060822, | |
| "learning_rate": 4.655533215707193e-05, | |
| "loss": 0.0932, | |
| "step": 13075 | |
| }, | |
| { | |
| "epoch": 0.5364125872694142, | |
| "grad_norm": 0.29335957765579224, | |
| "learning_rate": 4.6452751220713147e-05, | |
| "loss": 0.0942, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 0.5374362754130582, | |
| "grad_norm": 0.31220850348472595, | |
| "learning_rate": 4.6350170284354354e-05, | |
| "loss": 0.0931, | |
| "step": 13125 | |
| }, | |
| { | |
| "epoch": 0.5384599635567021, | |
| "grad_norm": 0.2569523751735687, | |
| "learning_rate": 4.624758934799557e-05, | |
| "loss": 0.1005, | |
| "step": 13150 | |
| }, | |
| { | |
| "epoch": 0.539483651700346, | |
| "grad_norm": 0.26125669479370117, | |
| "learning_rate": 4.6145008411636784e-05, | |
| "loss": 0.0924, | |
| "step": 13175 | |
| }, | |
| { | |
| "epoch": 0.5405073398439899, | |
| "grad_norm": 0.31377628445625305, | |
| "learning_rate": 4.604242747527799e-05, | |
| "loss": 0.0966, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 0.5415310279876339, | |
| "grad_norm": 0.25545644760131836, | |
| "learning_rate": 4.5939846538919206e-05, | |
| "loss": 0.0997, | |
| "step": 13225 | |
| }, | |
| { | |
| "epoch": 0.5425547161312778, | |
| "grad_norm": 0.2510424554347992, | |
| "learning_rate": 4.583726560256042e-05, | |
| "loss": 0.0939, | |
| "step": 13250 | |
| }, | |
| { | |
| "epoch": 0.5435784042749217, | |
| "grad_norm": 0.2709631323814392, | |
| "learning_rate": 4.5734684666201636e-05, | |
| "loss": 0.0918, | |
| "step": 13275 | |
| }, | |
| { | |
| "epoch": 0.5446020924185656, | |
| "grad_norm": 0.2531428337097168, | |
| "learning_rate": 4.5632103729842844e-05, | |
| "loss": 0.0968, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 0.5456257805622096, | |
| "grad_norm": 0.3153735101222992, | |
| "learning_rate": 4.552952279348406e-05, | |
| "loss": 0.0957, | |
| "step": 13325 | |
| }, | |
| { | |
| "epoch": 0.5466494687058534, | |
| "grad_norm": 0.2258891612291336, | |
| "learning_rate": 4.542694185712527e-05, | |
| "loss": 0.0947, | |
| "step": 13350 | |
| }, | |
| { | |
| "epoch": 0.5476731568494974, | |
| "grad_norm": 0.2671023905277252, | |
| "learning_rate": 4.532436092076649e-05, | |
| "loss": 0.0957, | |
| "step": 13375 | |
| }, | |
| { | |
| "epoch": 0.5486968449931413, | |
| "grad_norm": 0.333008348941803, | |
| "learning_rate": 4.5221779984407695e-05, | |
| "loss": 0.0953, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 0.5497205331367853, | |
| "grad_norm": 0.2922687828540802, | |
| "learning_rate": 4.511919904804891e-05, | |
| "loss": 0.0963, | |
| "step": 13425 | |
| }, | |
| { | |
| "epoch": 0.5507442212804291, | |
| "grad_norm": 0.27738088369369507, | |
| "learning_rate": 4.5016618111690125e-05, | |
| "loss": 0.0926, | |
| "step": 13450 | |
| }, | |
| { | |
| "epoch": 0.551767909424073, | |
| "grad_norm": 0.28781887888908386, | |
| "learning_rate": 4.491403717533134e-05, | |
| "loss": 0.0986, | |
| "step": 13475 | |
| }, | |
| { | |
| "epoch": 0.552791597567717, | |
| "grad_norm": 0.2727603018283844, | |
| "learning_rate": 4.481145623897255e-05, | |
| "loss": 0.0995, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.5538152857113608, | |
| "grad_norm": 0.2735615670681, | |
| "learning_rate": 4.470887530261376e-05, | |
| "loss": 0.0959, | |
| "step": 13525 | |
| }, | |
| { | |
| "epoch": 0.5548389738550048, | |
| "grad_norm": 0.2211311310529709, | |
| "learning_rate": 4.460629436625498e-05, | |
| "loss": 0.0872, | |
| "step": 13550 | |
| }, | |
| { | |
| "epoch": 0.5558626619986488, | |
| "grad_norm": 0.2359626144170761, | |
| "learning_rate": 4.4503713429896185e-05, | |
| "loss": 0.0967, | |
| "step": 13575 | |
| }, | |
| { | |
| "epoch": 0.5568863501422926, | |
| "grad_norm": 0.27807098627090454, | |
| "learning_rate": 4.44011324935374e-05, | |
| "loss": 0.0953, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 0.5579100382859365, | |
| "grad_norm": 0.2691691219806671, | |
| "learning_rate": 4.4298551557178614e-05, | |
| "loss": 0.0942, | |
| "step": 13625 | |
| }, | |
| { | |
| "epoch": 0.5589337264295805, | |
| "grad_norm": 0.22528734803199768, | |
| "learning_rate": 4.419597062081983e-05, | |
| "loss": 0.0975, | |
| "step": 13650 | |
| }, | |
| { | |
| "epoch": 0.5599574145732245, | |
| "grad_norm": 0.22979159653186798, | |
| "learning_rate": 4.4093389684461036e-05, | |
| "loss": 0.0946, | |
| "step": 13675 | |
| }, | |
| { | |
| "epoch": 0.5609811027168683, | |
| "grad_norm": 0.35849061608314514, | |
| "learning_rate": 4.399080874810225e-05, | |
| "loss": 0.0886, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 0.5620047908605122, | |
| "grad_norm": 0.2247435599565506, | |
| "learning_rate": 4.3888227811743466e-05, | |
| "loss": 0.0942, | |
| "step": 13725 | |
| }, | |
| { | |
| "epoch": 0.5630284790041562, | |
| "grad_norm": 0.2186431735754013, | |
| "learning_rate": 4.378564687538468e-05, | |
| "loss": 0.0958, | |
| "step": 13750 | |
| }, | |
| { | |
| "epoch": 0.5640521671478, | |
| "grad_norm": 0.26496851444244385, | |
| "learning_rate": 4.368306593902589e-05, | |
| "loss": 0.0932, | |
| "step": 13775 | |
| }, | |
| { | |
| "epoch": 0.565075855291444, | |
| "grad_norm": 0.20004922151565552, | |
| "learning_rate": 4.35804850026671e-05, | |
| "loss": 0.1003, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 0.566099543435088, | |
| "grad_norm": 0.25645968317985535, | |
| "learning_rate": 4.347790406630832e-05, | |
| "loss": 0.0932, | |
| "step": 13825 | |
| }, | |
| { | |
| "epoch": 0.5671232315787319, | |
| "grad_norm": 0.24646583199501038, | |
| "learning_rate": 4.337532312994953e-05, | |
| "loss": 0.0954, | |
| "step": 13850 | |
| }, | |
| { | |
| "epoch": 0.5681469197223757, | |
| "grad_norm": 0.25467848777770996, | |
| "learning_rate": 4.327274219359074e-05, | |
| "loss": 0.0982, | |
| "step": 13875 | |
| }, | |
| { | |
| "epoch": 0.5691706078660197, | |
| "grad_norm": 0.24455401301383972, | |
| "learning_rate": 4.3170161257231955e-05, | |
| "loss": 0.0913, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 0.5701942960096636, | |
| "grad_norm": 0.26495805382728577, | |
| "learning_rate": 4.306758032087317e-05, | |
| "loss": 0.0894, | |
| "step": 13925 | |
| }, | |
| { | |
| "epoch": 0.5712179841533075, | |
| "grad_norm": 0.23517432808876038, | |
| "learning_rate": 4.2964999384514384e-05, | |
| "loss": 0.0941, | |
| "step": 13950 | |
| }, | |
| { | |
| "epoch": 0.5722416722969514, | |
| "grad_norm": 0.25355222821235657, | |
| "learning_rate": 4.286241844815559e-05, | |
| "loss": 0.09, | |
| "step": 13975 | |
| }, | |
| { | |
| "epoch": 0.5732653604405954, | |
| "grad_norm": 0.2494240403175354, | |
| "learning_rate": 4.275983751179681e-05, | |
| "loss": 0.1003, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.5742890485842393, | |
| "grad_norm": 0.22723737359046936, | |
| "learning_rate": 4.265725657543802e-05, | |
| "loss": 0.1001, | |
| "step": 14025 | |
| }, | |
| { | |
| "epoch": 0.5753127367278832, | |
| "grad_norm": 0.19633585214614868, | |
| "learning_rate": 4.2554675639079236e-05, | |
| "loss": 0.0934, | |
| "step": 14050 | |
| }, | |
| { | |
| "epoch": 0.5763364248715271, | |
| "grad_norm": 0.24108199775218964, | |
| "learning_rate": 4.2452094702720444e-05, | |
| "loss": 0.0932, | |
| "step": 14075 | |
| }, | |
| { | |
| "epoch": 0.5773601130151711, | |
| "grad_norm": 0.28201839327812195, | |
| "learning_rate": 4.234951376636166e-05, | |
| "loss": 0.0931, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 0.5783838011588149, | |
| "grad_norm": 0.29982468485832214, | |
| "learning_rate": 4.224693283000287e-05, | |
| "loss": 0.0941, | |
| "step": 14125 | |
| }, | |
| { | |
| "epoch": 0.5794074893024589, | |
| "grad_norm": 0.2526194453239441, | |
| "learning_rate": 4.214435189364409e-05, | |
| "loss": 0.0937, | |
| "step": 14150 | |
| }, | |
| { | |
| "epoch": 0.5804311774461028, | |
| "grad_norm": 0.2288905531167984, | |
| "learning_rate": 4.2041770957285296e-05, | |
| "loss": 0.0949, | |
| "step": 14175 | |
| }, | |
| { | |
| "epoch": 0.5814548655897468, | |
| "grad_norm": 0.19117498397827148, | |
| "learning_rate": 4.193919002092651e-05, | |
| "loss": 0.0938, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 0.5824785537333906, | |
| "grad_norm": 0.2719483971595764, | |
| "learning_rate": 4.1836609084567725e-05, | |
| "loss": 0.0911, | |
| "step": 14225 | |
| }, | |
| { | |
| "epoch": 0.5835022418770346, | |
| "grad_norm": 0.2975625991821289, | |
| "learning_rate": 4.173402814820894e-05, | |
| "loss": 0.0991, | |
| "step": 14250 | |
| }, | |
| { | |
| "epoch": 0.5845259300206785, | |
| "grad_norm": 0.2232026904821396, | |
| "learning_rate": 4.163144721185015e-05, | |
| "loss": 0.0971, | |
| "step": 14275 | |
| }, | |
| { | |
| "epoch": 0.5855496181643224, | |
| "grad_norm": 0.26348811388015747, | |
| "learning_rate": 4.152886627549136e-05, | |
| "loss": 0.0946, | |
| "step": 14300 | |
| }, | |
| { | |
| "epoch": 0.5865733063079663, | |
| "grad_norm": 0.320698082447052, | |
| "learning_rate": 4.142628533913258e-05, | |
| "loss": 0.0909, | |
| "step": 14325 | |
| }, | |
| { | |
| "epoch": 0.5875969944516103, | |
| "grad_norm": 0.27873241901397705, | |
| "learning_rate": 4.132370440277379e-05, | |
| "loss": 0.0986, | |
| "step": 14350 | |
| }, | |
| { | |
| "epoch": 0.5886206825952541, | |
| "grad_norm": 0.22352805733680725, | |
| "learning_rate": 4.1221123466415e-05, | |
| "loss": 0.0959, | |
| "step": 14375 | |
| }, | |
| { | |
| "epoch": 0.5896443707388981, | |
| "grad_norm": 0.2206275910139084, | |
| "learning_rate": 4.1118542530056214e-05, | |
| "loss": 0.0973, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 0.590668058882542, | |
| "grad_norm": 0.20755049586296082, | |
| "learning_rate": 4.101596159369743e-05, | |
| "loss": 0.0987, | |
| "step": 14425 | |
| }, | |
| { | |
| "epoch": 0.591691747026186, | |
| "grad_norm": 0.25802165269851685, | |
| "learning_rate": 4.091338065733864e-05, | |
| "loss": 0.0898, | |
| "step": 14450 | |
| }, | |
| { | |
| "epoch": 0.5927154351698298, | |
| "grad_norm": 0.21148554980754852, | |
| "learning_rate": 4.081079972097985e-05, | |
| "loss": 0.0906, | |
| "step": 14475 | |
| }, | |
| { | |
| "epoch": 0.5937391233134738, | |
| "grad_norm": 0.28330081701278687, | |
| "learning_rate": 4.0708218784621066e-05, | |
| "loss": 0.0952, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.5947628114571177, | |
| "grad_norm": 0.26006045937538147, | |
| "learning_rate": 4.060563784826228e-05, | |
| "loss": 0.0879, | |
| "step": 14525 | |
| }, | |
| { | |
| "epoch": 0.5957864996007616, | |
| "grad_norm": 0.2529297173023224, | |
| "learning_rate": 4.050305691190349e-05, | |
| "loss": 0.0899, | |
| "step": 14550 | |
| }, | |
| { | |
| "epoch": 0.5968101877444055, | |
| "grad_norm": 0.25934335589408875, | |
| "learning_rate": 4.0400475975544703e-05, | |
| "loss": 0.0974, | |
| "step": 14575 | |
| }, | |
| { | |
| "epoch": 0.5978338758880495, | |
| "grad_norm": 0.34801098704338074, | |
| "learning_rate": 4.029789503918592e-05, | |
| "loss": 0.0966, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 0.5988575640316934, | |
| "grad_norm": 0.3519505262374878, | |
| "learning_rate": 4.019531410282713e-05, | |
| "loss": 0.0976, | |
| "step": 14625 | |
| }, | |
| { | |
| "epoch": 0.5998812521753373, | |
| "grad_norm": 0.21879540383815765, | |
| "learning_rate": 4.009273316646834e-05, | |
| "loss": 0.0929, | |
| "step": 14650 | |
| }, | |
| { | |
| "epoch": 0.6009049403189812, | |
| "grad_norm": 0.3088552951812744, | |
| "learning_rate": 3.9990152230109555e-05, | |
| "loss": 0.0895, | |
| "step": 14675 | |
| }, | |
| { | |
| "epoch": 0.6019286284626252, | |
| "grad_norm": 0.1896054446697235, | |
| "learning_rate": 3.988757129375077e-05, | |
| "loss": 0.0954, | |
| "step": 14700 | |
| }, | |
| { | |
| "epoch": 0.602952316606269, | |
| "grad_norm": 0.24023133516311646, | |
| "learning_rate": 3.9784990357391985e-05, | |
| "loss": 0.0928, | |
| "step": 14725 | |
| }, | |
| { | |
| "epoch": 0.603976004749913, | |
| "grad_norm": 0.2335812747478485, | |
| "learning_rate": 3.968240942103319e-05, | |
| "loss": 0.0915, | |
| "step": 14750 | |
| }, | |
| { | |
| "epoch": 0.6049996928935569, | |
| "grad_norm": 0.2618425488471985, | |
| "learning_rate": 3.957982848467441e-05, | |
| "loss": 0.093, | |
| "step": 14775 | |
| }, | |
| { | |
| "epoch": 0.6060233810372009, | |
| "grad_norm": 0.28540539741516113, | |
| "learning_rate": 3.947724754831562e-05, | |
| "loss": 0.0945, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 0.6070470691808447, | |
| "grad_norm": 0.34257885813713074, | |
| "learning_rate": 3.9374666611956837e-05, | |
| "loss": 0.0937, | |
| "step": 14825 | |
| }, | |
| { | |
| "epoch": 0.6080707573244887, | |
| "grad_norm": 0.242543026804924, | |
| "learning_rate": 3.9272085675598045e-05, | |
| "loss": 0.1006, | |
| "step": 14850 | |
| }, | |
| { | |
| "epoch": 0.6090944454681326, | |
| "grad_norm": 0.3368709683418274, | |
| "learning_rate": 3.916950473923926e-05, | |
| "loss": 0.098, | |
| "step": 14875 | |
| }, | |
| { | |
| "epoch": 0.6101181336117765, | |
| "grad_norm": 0.2498284876346588, | |
| "learning_rate": 3.9066923802880474e-05, | |
| "loss": 0.0945, | |
| "step": 14900 | |
| }, | |
| { | |
| "epoch": 0.6111418217554204, | |
| "grad_norm": 0.36862441897392273, | |
| "learning_rate": 3.896434286652169e-05, | |
| "loss": 0.0947, | |
| "step": 14925 | |
| }, | |
| { | |
| "epoch": 0.6121655098990644, | |
| "grad_norm": 0.2516944110393524, | |
| "learning_rate": 3.8861761930162896e-05, | |
| "loss": 0.0904, | |
| "step": 14950 | |
| }, | |
| { | |
| "epoch": 0.6131891980427082, | |
| "grad_norm": 0.23044705390930176, | |
| "learning_rate": 3.875918099380411e-05, | |
| "loss": 0.0922, | |
| "step": 14975 | |
| }, | |
| { | |
| "epoch": 0.6142128861863522, | |
| "grad_norm": 0.322510689496994, | |
| "learning_rate": 3.8656600057445326e-05, | |
| "loss": 0.0939, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.6152365743299961, | |
| "grad_norm": 0.28902101516723633, | |
| "learning_rate": 3.855401912108654e-05, | |
| "loss": 0.092, | |
| "step": 15025 | |
| }, | |
| { | |
| "epoch": 0.6162602624736401, | |
| "grad_norm": 0.33545222878456116, | |
| "learning_rate": 3.845143818472775e-05, | |
| "loss": 0.0931, | |
| "step": 15050 | |
| }, | |
| { | |
| "epoch": 0.6172839506172839, | |
| "grad_norm": 0.24440859258174896, | |
| "learning_rate": 3.834885724836896e-05, | |
| "loss": 0.0957, | |
| "step": 15075 | |
| }, | |
| { | |
| "epoch": 0.6183076387609279, | |
| "grad_norm": 0.25635841488838196, | |
| "learning_rate": 3.824627631201018e-05, | |
| "loss": 0.0945, | |
| "step": 15100 | |
| }, | |
| { | |
| "epoch": 0.6193313269045718, | |
| "grad_norm": 0.26487112045288086, | |
| "learning_rate": 3.814369537565139e-05, | |
| "loss": 0.094, | |
| "step": 15125 | |
| }, | |
| { | |
| "epoch": 0.6203550150482157, | |
| "grad_norm": 0.2371329963207245, | |
| "learning_rate": 3.80411144392926e-05, | |
| "loss": 0.0939, | |
| "step": 15150 | |
| }, | |
| { | |
| "epoch": 0.6213787031918596, | |
| "grad_norm": 0.23745235800743103, | |
| "learning_rate": 3.7938533502933815e-05, | |
| "loss": 0.0894, | |
| "step": 15175 | |
| }, | |
| { | |
| "epoch": 0.6224023913355036, | |
| "grad_norm": 0.32679396867752075, | |
| "learning_rate": 3.783595256657503e-05, | |
| "loss": 0.0916, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 0.6234260794791475, | |
| "grad_norm": 0.236038938164711, | |
| "learning_rate": 3.7733371630216244e-05, | |
| "loss": 0.0896, | |
| "step": 15225 | |
| }, | |
| { | |
| "epoch": 0.6244497676227914, | |
| "grad_norm": 0.17879773676395416, | |
| "learning_rate": 3.763079069385745e-05, | |
| "loss": 0.0971, | |
| "step": 15250 | |
| }, | |
| { | |
| "epoch": 0.6254734557664353, | |
| "grad_norm": 0.30429938435554504, | |
| "learning_rate": 3.752820975749867e-05, | |
| "loss": 0.0884, | |
| "step": 15275 | |
| }, | |
| { | |
| "epoch": 0.6264971439100793, | |
| "grad_norm": 0.332989364862442, | |
| "learning_rate": 3.742562882113988e-05, | |
| "loss": 0.0998, | |
| "step": 15300 | |
| }, | |
| { | |
| "epoch": 0.6275208320537231, | |
| "grad_norm": 0.2244502305984497, | |
| "learning_rate": 3.7323047884781096e-05, | |
| "loss": 0.0894, | |
| "step": 15325 | |
| }, | |
| { | |
| "epoch": 0.6285445201973671, | |
| "grad_norm": 0.22671306133270264, | |
| "learning_rate": 3.7220466948422304e-05, | |
| "loss": 0.0957, | |
| "step": 15350 | |
| }, | |
| { | |
| "epoch": 0.629568208341011, | |
| "grad_norm": 0.22526578605175018, | |
| "learning_rate": 3.711788601206352e-05, | |
| "loss": 0.0892, | |
| "step": 15375 | |
| }, | |
| { | |
| "epoch": 0.630591896484655, | |
| "grad_norm": 0.2514040768146515, | |
| "learning_rate": 3.701530507570473e-05, | |
| "loss": 0.0916, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 0.6316155846282988, | |
| "grad_norm": 0.22782598435878754, | |
| "learning_rate": 3.691272413934594e-05, | |
| "loss": 0.0977, | |
| "step": 15425 | |
| }, | |
| { | |
| "epoch": 0.6326392727719428, | |
| "grad_norm": 0.24572695791721344, | |
| "learning_rate": 3.6810143202987156e-05, | |
| "loss": 0.0919, | |
| "step": 15450 | |
| }, | |
| { | |
| "epoch": 0.6336629609155867, | |
| "grad_norm": 0.231769859790802, | |
| "learning_rate": 3.670756226662837e-05, | |
| "loss": 0.0904, | |
| "step": 15475 | |
| }, | |
| { | |
| "epoch": 0.6346866490592306, | |
| "grad_norm": 0.28821659088134766, | |
| "learning_rate": 3.6604981330269585e-05, | |
| "loss": 0.0905, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.6357103372028745, | |
| "grad_norm": 0.19901390373706818, | |
| "learning_rate": 3.650240039391079e-05, | |
| "loss": 0.0885, | |
| "step": 15525 | |
| }, | |
| { | |
| "epoch": 0.6367340253465185, | |
| "grad_norm": 0.24236318469047546, | |
| "learning_rate": 3.639981945755201e-05, | |
| "loss": 0.0929, | |
| "step": 15550 | |
| }, | |
| { | |
| "epoch": 0.6377577134901623, | |
| "grad_norm": 0.27218177914619446, | |
| "learning_rate": 3.629723852119322e-05, | |
| "loss": 0.0925, | |
| "step": 15575 | |
| }, | |
| { | |
| "epoch": 0.6387814016338063, | |
| "grad_norm": 0.29827386140823364, | |
| "learning_rate": 3.619465758483444e-05, | |
| "loss": 0.0913, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 0.6398050897774502, | |
| "grad_norm": 0.2742908000946045, | |
| "learning_rate": 3.6092076648475645e-05, | |
| "loss": 0.0941, | |
| "step": 15625 | |
| }, | |
| { | |
| "epoch": 0.6408287779210942, | |
| "grad_norm": 0.28651776909828186, | |
| "learning_rate": 3.598949571211686e-05, | |
| "loss": 0.099, | |
| "step": 15650 | |
| }, | |
| { | |
| "epoch": 0.641852466064738, | |
| "grad_norm": 0.2705094814300537, | |
| "learning_rate": 3.5886914775758074e-05, | |
| "loss": 0.0972, | |
| "step": 15675 | |
| }, | |
| { | |
| "epoch": 0.642876154208382, | |
| "grad_norm": 0.2905079424381256, | |
| "learning_rate": 3.578433383939929e-05, | |
| "loss": 0.0952, | |
| "step": 15700 | |
| }, | |
| { | |
| "epoch": 0.6438998423520259, | |
| "grad_norm": 0.28126639127731323, | |
| "learning_rate": 3.56817529030405e-05, | |
| "loss": 0.0928, | |
| "step": 15725 | |
| }, | |
| { | |
| "epoch": 0.6449235304956698, | |
| "grad_norm": 0.23970367014408112, | |
| "learning_rate": 3.557917196668171e-05, | |
| "loss": 0.0945, | |
| "step": 15750 | |
| }, | |
| { | |
| "epoch": 0.6459472186393137, | |
| "grad_norm": 0.23676908016204834, | |
| "learning_rate": 3.5476591030322926e-05, | |
| "loss": 0.0936, | |
| "step": 15775 | |
| }, | |
| { | |
| "epoch": 0.6469709067829577, | |
| "grad_norm": 0.2415960431098938, | |
| "learning_rate": 3.537401009396414e-05, | |
| "loss": 0.0923, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 0.6479945949266016, | |
| "grad_norm": 0.25593453645706177, | |
| "learning_rate": 3.527142915760535e-05, | |
| "loss": 0.0924, | |
| "step": 15825 | |
| }, | |
| { | |
| "epoch": 0.6490182830702454, | |
| "grad_norm": 0.24800516664981842, | |
| "learning_rate": 3.5168848221246563e-05, | |
| "loss": 0.0945, | |
| "step": 15850 | |
| }, | |
| { | |
| "epoch": 0.6500419712138894, | |
| "grad_norm": 0.24197053909301758, | |
| "learning_rate": 3.506626728488778e-05, | |
| "loss": 0.0932, | |
| "step": 15875 | |
| }, | |
| { | |
| "epoch": 0.6510656593575334, | |
| "grad_norm": 0.25776922702789307, | |
| "learning_rate": 3.496368634852899e-05, | |
| "loss": 0.0909, | |
| "step": 15900 | |
| }, | |
| { | |
| "epoch": 0.6520893475011772, | |
| "grad_norm": 0.1953815519809723, | |
| "learning_rate": 3.48611054121702e-05, | |
| "loss": 0.0927, | |
| "step": 15925 | |
| }, | |
| { | |
| "epoch": 0.6531130356448211, | |
| "grad_norm": 0.267980694770813, | |
| "learning_rate": 3.4758524475811415e-05, | |
| "loss": 0.0917, | |
| "step": 15950 | |
| }, | |
| { | |
| "epoch": 0.6541367237884651, | |
| "grad_norm": 0.2663339674472809, | |
| "learning_rate": 3.465594353945263e-05, | |
| "loss": 0.0921, | |
| "step": 15975 | |
| }, | |
| { | |
| "epoch": 0.655160411932109, | |
| "grad_norm": 0.32129451632499695, | |
| "learning_rate": 3.4553362603093845e-05, | |
| "loss": 0.0906, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.6561841000757529, | |
| "grad_norm": 0.23216140270233154, | |
| "learning_rate": 3.445078166673505e-05, | |
| "loss": 0.0916, | |
| "step": 16025 | |
| }, | |
| { | |
| "epoch": 0.6572077882193968, | |
| "grad_norm": 0.26740553975105286, | |
| "learning_rate": 3.434820073037627e-05, | |
| "loss": 0.0939, | |
| "step": 16050 | |
| }, | |
| { | |
| "epoch": 0.6582314763630408, | |
| "grad_norm": 0.26812317967414856, | |
| "learning_rate": 3.424561979401748e-05, | |
| "loss": 0.0903, | |
| "step": 16075 | |
| }, | |
| { | |
| "epoch": 0.6592551645066846, | |
| "grad_norm": 0.2955368459224701, | |
| "learning_rate": 3.4143038857658697e-05, | |
| "loss": 0.0907, | |
| "step": 16100 | |
| }, | |
| { | |
| "epoch": 0.6602788526503286, | |
| "grad_norm": 0.23007504642009735, | |
| "learning_rate": 3.4040457921299904e-05, | |
| "loss": 0.0894, | |
| "step": 16125 | |
| }, | |
| { | |
| "epoch": 0.6613025407939725, | |
| "grad_norm": 0.2416328340768814, | |
| "learning_rate": 3.393787698494112e-05, | |
| "loss": 0.0872, | |
| "step": 16150 | |
| }, | |
| { | |
| "epoch": 0.6623262289376164, | |
| "grad_norm": 0.2104121297597885, | |
| "learning_rate": 3.3835296048582334e-05, | |
| "loss": 0.0877, | |
| "step": 16175 | |
| }, | |
| { | |
| "epoch": 0.6633499170812603, | |
| "grad_norm": 0.23629434406757355, | |
| "learning_rate": 3.373271511222355e-05, | |
| "loss": 0.0943, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 0.6643736052249043, | |
| "grad_norm": 0.2717180550098419, | |
| "learning_rate": 3.3630134175864756e-05, | |
| "loss": 0.0879, | |
| "step": 16225 | |
| }, | |
| { | |
| "epoch": 0.6653972933685482, | |
| "grad_norm": 0.27863848209381104, | |
| "learning_rate": 3.352755323950597e-05, | |
| "loss": 0.0881, | |
| "step": 16250 | |
| }, | |
| { | |
| "epoch": 0.6664209815121921, | |
| "grad_norm": 0.2909884452819824, | |
| "learning_rate": 3.3424972303147186e-05, | |
| "loss": 0.0938, | |
| "step": 16275 | |
| }, | |
| { | |
| "epoch": 0.667444669655836, | |
| "grad_norm": 0.18690423667430878, | |
| "learning_rate": 3.3322391366788394e-05, | |
| "loss": 0.0945, | |
| "step": 16300 | |
| }, | |
| { | |
| "epoch": 0.66846835779948, | |
| "grad_norm": 0.2364642322063446, | |
| "learning_rate": 3.321981043042961e-05, | |
| "loss": 0.0981, | |
| "step": 16325 | |
| }, | |
| { | |
| "epoch": 0.6694920459431238, | |
| "grad_norm": 0.23339948058128357, | |
| "learning_rate": 3.311722949407082e-05, | |
| "loss": 0.0959, | |
| "step": 16350 | |
| }, | |
| { | |
| "epoch": 0.6705157340867678, | |
| "grad_norm": 0.3215301036834717, | |
| "learning_rate": 3.301464855771204e-05, | |
| "loss": 0.0915, | |
| "step": 16375 | |
| }, | |
| { | |
| "epoch": 0.6715394222304117, | |
| "grad_norm": 0.21121945977210999, | |
| "learning_rate": 3.2912067621353245e-05, | |
| "loss": 0.0965, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 0.6725631103740557, | |
| "grad_norm": 0.2474169135093689, | |
| "learning_rate": 3.280948668499446e-05, | |
| "loss": 0.0901, | |
| "step": 16425 | |
| }, | |
| { | |
| "epoch": 0.6735867985176995, | |
| "grad_norm": 0.27990350127220154, | |
| "learning_rate": 3.2706905748635675e-05, | |
| "loss": 0.0914, | |
| "step": 16450 | |
| }, | |
| { | |
| "epoch": 0.6746104866613435, | |
| "grad_norm": 0.23860132694244385, | |
| "learning_rate": 3.260432481227689e-05, | |
| "loss": 0.0892, | |
| "step": 16475 | |
| }, | |
| { | |
| "epoch": 0.6756341748049874, | |
| "grad_norm": 0.29351699352264404, | |
| "learning_rate": 3.25017438759181e-05, | |
| "loss": 0.0956, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 0.6766578629486313, | |
| "grad_norm": 0.2769309878349304, | |
| "learning_rate": 3.239916293955931e-05, | |
| "loss": 0.0938, | |
| "step": 16525 | |
| }, | |
| { | |
| "epoch": 0.6776815510922752, | |
| "grad_norm": 0.1899634450674057, | |
| "learning_rate": 3.229658200320053e-05, | |
| "loss": 0.0927, | |
| "step": 16550 | |
| }, | |
| { | |
| "epoch": 0.6787052392359192, | |
| "grad_norm": 0.23339390754699707, | |
| "learning_rate": 3.219400106684174e-05, | |
| "loss": 0.0944, | |
| "step": 16575 | |
| }, | |
| { | |
| "epoch": 0.6797289273795631, | |
| "grad_norm": 0.30219605565071106, | |
| "learning_rate": 3.209142013048295e-05, | |
| "loss": 0.0908, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 0.680752615523207, | |
| "grad_norm": 0.24272675812244415, | |
| "learning_rate": 3.1988839194124164e-05, | |
| "loss": 0.0905, | |
| "step": 16625 | |
| }, | |
| { | |
| "epoch": 0.6817763036668509, | |
| "grad_norm": 0.28862476348876953, | |
| "learning_rate": 3.188625825776538e-05, | |
| "loss": 0.0958, | |
| "step": 16650 | |
| }, | |
| { | |
| "epoch": 0.6827999918104949, | |
| "grad_norm": 0.230793759226799, | |
| "learning_rate": 3.178367732140659e-05, | |
| "loss": 0.0942, | |
| "step": 16675 | |
| }, | |
| { | |
| "epoch": 0.6838236799541387, | |
| "grad_norm": 0.256304532289505, | |
| "learning_rate": 3.16810963850478e-05, | |
| "loss": 0.0908, | |
| "step": 16700 | |
| }, | |
| { | |
| "epoch": 0.6848473680977827, | |
| "grad_norm": 0.24292372167110443, | |
| "learning_rate": 3.1578515448689016e-05, | |
| "loss": 0.0919, | |
| "step": 16725 | |
| }, | |
| { | |
| "epoch": 0.6858710562414266, | |
| "grad_norm": 0.3442842662334442, | |
| "learning_rate": 3.147593451233023e-05, | |
| "loss": 0.0906, | |
| "step": 16750 | |
| }, | |
| { | |
| "epoch": 0.6868947443850705, | |
| "grad_norm": 0.28444263339042664, | |
| "learning_rate": 3.1373353575971445e-05, | |
| "loss": 0.0904, | |
| "step": 16775 | |
| }, | |
| { | |
| "epoch": 0.6879184325287144, | |
| "grad_norm": 0.2305566966533661, | |
| "learning_rate": 3.127077263961265e-05, | |
| "loss": 0.0948, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 0.6889421206723584, | |
| "grad_norm": 0.3065620958805084, | |
| "learning_rate": 3.116819170325387e-05, | |
| "loss": 0.0916, | |
| "step": 16825 | |
| }, | |
| { | |
| "epoch": 0.6899658088160023, | |
| "grad_norm": 0.34748420119285583, | |
| "learning_rate": 3.106561076689508e-05, | |
| "loss": 0.0898, | |
| "step": 16850 | |
| }, | |
| { | |
| "epoch": 0.6909894969596462, | |
| "grad_norm": 0.28425559401512146, | |
| "learning_rate": 3.09630298305363e-05, | |
| "loss": 0.0941, | |
| "step": 16875 | |
| }, | |
| { | |
| "epoch": 0.6920131851032901, | |
| "grad_norm": 0.31354910135269165, | |
| "learning_rate": 3.0860448894177505e-05, | |
| "loss": 0.0945, | |
| "step": 16900 | |
| }, | |
| { | |
| "epoch": 0.6930368732469341, | |
| "grad_norm": 0.2128172069787979, | |
| "learning_rate": 3.075786795781872e-05, | |
| "loss": 0.0909, | |
| "step": 16925 | |
| }, | |
| { | |
| "epoch": 0.6940605613905779, | |
| "grad_norm": 0.2469140887260437, | |
| "learning_rate": 3.0655287021459934e-05, | |
| "loss": 0.0939, | |
| "step": 16950 | |
| }, | |
| { | |
| "epoch": 0.6950842495342219, | |
| "grad_norm": 0.35298585891723633, | |
| "learning_rate": 3.055270608510115e-05, | |
| "loss": 0.089, | |
| "step": 16975 | |
| }, | |
| { | |
| "epoch": 0.6961079376778658, | |
| "grad_norm": 0.26399216055870056, | |
| "learning_rate": 3.045012514874236e-05, | |
| "loss": 0.0968, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.6971316258215098, | |
| "grad_norm": 0.2543809413909912, | |
| "learning_rate": 3.034754421238357e-05, | |
| "loss": 0.0908, | |
| "step": 17025 | |
| }, | |
| { | |
| "epoch": 0.6981553139651536, | |
| "grad_norm": 0.24737343192100525, | |
| "learning_rate": 3.0244963276024786e-05, | |
| "loss": 0.094, | |
| "step": 17050 | |
| }, | |
| { | |
| "epoch": 0.6991790021087976, | |
| "grad_norm": 0.2577686607837677, | |
| "learning_rate": 3.0142382339665997e-05, | |
| "loss": 0.0933, | |
| "step": 17075 | |
| }, | |
| { | |
| "epoch": 0.7002026902524415, | |
| "grad_norm": 0.28968894481658936, | |
| "learning_rate": 3.0039801403307212e-05, | |
| "loss": 0.0965, | |
| "step": 17100 | |
| }, | |
| { | |
| "epoch": 0.7012263783960854, | |
| "grad_norm": 0.2456517517566681, | |
| "learning_rate": 2.9937220466948423e-05, | |
| "loss": 0.0953, | |
| "step": 17125 | |
| }, | |
| { | |
| "epoch": 0.7022500665397293, | |
| "grad_norm": 0.25714367628097534, | |
| "learning_rate": 2.9834639530589638e-05, | |
| "loss": 0.0888, | |
| "step": 17150 | |
| }, | |
| { | |
| "epoch": 0.7032737546833733, | |
| "grad_norm": 0.2177487164735794, | |
| "learning_rate": 2.973205859423085e-05, | |
| "loss": 0.0917, | |
| "step": 17175 | |
| }, | |
| { | |
| "epoch": 0.7042974428270172, | |
| "grad_norm": 0.20064932107925415, | |
| "learning_rate": 2.962947765787206e-05, | |
| "loss": 0.0899, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 0.7053211309706611, | |
| "grad_norm": 0.2717735469341278, | |
| "learning_rate": 2.9526896721513275e-05, | |
| "loss": 0.0939, | |
| "step": 17225 | |
| }, | |
| { | |
| "epoch": 0.706344819114305, | |
| "grad_norm": 0.20536677539348602, | |
| "learning_rate": 2.9424315785154487e-05, | |
| "loss": 0.0941, | |
| "step": 17250 | |
| }, | |
| { | |
| "epoch": 0.707368507257949, | |
| "grad_norm": 0.28099992871284485, | |
| "learning_rate": 2.93217348487957e-05, | |
| "loss": 0.0881, | |
| "step": 17275 | |
| }, | |
| { | |
| "epoch": 0.7083921954015928, | |
| "grad_norm": 0.21004413068294525, | |
| "learning_rate": 2.9219153912436913e-05, | |
| "loss": 0.0945, | |
| "step": 17300 | |
| }, | |
| { | |
| "epoch": 0.7094158835452368, | |
| "grad_norm": 0.24377816915512085, | |
| "learning_rate": 2.9116572976078127e-05, | |
| "loss": 0.0943, | |
| "step": 17325 | |
| }, | |
| { | |
| "epoch": 0.7104395716888807, | |
| "grad_norm": 0.2159167379140854, | |
| "learning_rate": 2.901399203971934e-05, | |
| "loss": 0.0916, | |
| "step": 17350 | |
| }, | |
| { | |
| "epoch": 0.7114632598325247, | |
| "grad_norm": 0.3277469277381897, | |
| "learning_rate": 2.8911411103360553e-05, | |
| "loss": 0.0894, | |
| "step": 17375 | |
| }, | |
| { | |
| "epoch": 0.7124869479761685, | |
| "grad_norm": 0.3423548638820648, | |
| "learning_rate": 2.8808830167001764e-05, | |
| "loss": 0.0922, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 0.7135106361198125, | |
| "grad_norm": 0.20151039958000183, | |
| "learning_rate": 2.870624923064298e-05, | |
| "loss": 0.0906, | |
| "step": 17425 | |
| }, | |
| { | |
| "epoch": 0.7145343242634564, | |
| "grad_norm": 0.29227256774902344, | |
| "learning_rate": 2.860366829428419e-05, | |
| "loss": 0.0914, | |
| "step": 17450 | |
| }, | |
| { | |
| "epoch": 0.7155580124071003, | |
| "grad_norm": 0.31062838435173035, | |
| "learning_rate": 2.8501087357925405e-05, | |
| "loss": 0.0932, | |
| "step": 17475 | |
| }, | |
| { | |
| "epoch": 0.7165817005507442, | |
| "grad_norm": 0.24426613748073578, | |
| "learning_rate": 2.8398506421566616e-05, | |
| "loss": 0.0938, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 0.7176053886943882, | |
| "grad_norm": 0.2505645155906677, | |
| "learning_rate": 2.829592548520783e-05, | |
| "loss": 0.0924, | |
| "step": 17525 | |
| }, | |
| { | |
| "epoch": 0.718629076838032, | |
| "grad_norm": 0.21960324048995972, | |
| "learning_rate": 2.8193344548849042e-05, | |
| "loss": 0.0912, | |
| "step": 17550 | |
| }, | |
| { | |
| "epoch": 0.719652764981676, | |
| "grad_norm": 0.25820910930633545, | |
| "learning_rate": 2.8090763612490257e-05, | |
| "loss": 0.0913, | |
| "step": 17575 | |
| }, | |
| { | |
| "epoch": 0.7206764531253199, | |
| "grad_norm": 0.23069611191749573, | |
| "learning_rate": 2.7988182676131468e-05, | |
| "loss": 0.0903, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 0.7217001412689639, | |
| "grad_norm": 0.2641305923461914, | |
| "learning_rate": 2.7885601739772683e-05, | |
| "loss": 0.0899, | |
| "step": 17625 | |
| }, | |
| { | |
| "epoch": 0.7227238294126077, | |
| "grad_norm": 0.28528881072998047, | |
| "learning_rate": 2.7783020803413894e-05, | |
| "loss": 0.0922, | |
| "step": 17650 | |
| }, | |
| { | |
| "epoch": 0.7237475175562517, | |
| "grad_norm": 0.297124445438385, | |
| "learning_rate": 2.768043986705511e-05, | |
| "loss": 0.0899, | |
| "step": 17675 | |
| }, | |
| { | |
| "epoch": 0.7247712056998956, | |
| "grad_norm": 0.2650444805622101, | |
| "learning_rate": 2.757785893069632e-05, | |
| "loss": 0.0903, | |
| "step": 17700 | |
| }, | |
| { | |
| "epoch": 0.7257948938435395, | |
| "grad_norm": 0.2515466809272766, | |
| "learning_rate": 2.7475277994337535e-05, | |
| "loss": 0.0943, | |
| "step": 17725 | |
| }, | |
| { | |
| "epoch": 0.7268185819871834, | |
| "grad_norm": 0.29468923807144165, | |
| "learning_rate": 2.7372697057978746e-05, | |
| "loss": 0.0935, | |
| "step": 17750 | |
| }, | |
| { | |
| "epoch": 0.7278422701308274, | |
| "grad_norm": 0.28869664669036865, | |
| "learning_rate": 2.727011612161996e-05, | |
| "loss": 0.0877, | |
| "step": 17775 | |
| }, | |
| { | |
| "epoch": 0.7288659582744713, | |
| "grad_norm": 0.2862752377986908, | |
| "learning_rate": 2.7167535185261172e-05, | |
| "loss": 0.0894, | |
| "step": 17800 | |
| }, | |
| { | |
| "epoch": 0.7298896464181152, | |
| "grad_norm": 0.4324943721294403, | |
| "learning_rate": 2.7064954248902387e-05, | |
| "loss": 0.0964, | |
| "step": 17825 | |
| }, | |
| { | |
| "epoch": 0.7309133345617591, | |
| "grad_norm": 0.2106688767671585, | |
| "learning_rate": 2.6962373312543598e-05, | |
| "loss": 0.0941, | |
| "step": 17850 | |
| }, | |
| { | |
| "epoch": 0.7319370227054031, | |
| "grad_norm": 0.2924487292766571, | |
| "learning_rate": 2.6859792376184813e-05, | |
| "loss": 0.0895, | |
| "step": 17875 | |
| }, | |
| { | |
| "epoch": 0.7329607108490469, | |
| "grad_norm": 0.21302323043346405, | |
| "learning_rate": 2.6757211439826024e-05, | |
| "loss": 0.0951, | |
| "step": 17900 | |
| }, | |
| { | |
| "epoch": 0.7339843989926909, | |
| "grad_norm": 0.2614041268825531, | |
| "learning_rate": 2.665463050346724e-05, | |
| "loss": 0.0885, | |
| "step": 17925 | |
| }, | |
| { | |
| "epoch": 0.7350080871363348, | |
| "grad_norm": 0.2530576288700104, | |
| "learning_rate": 2.655204956710845e-05, | |
| "loss": 0.0906, | |
| "step": 17950 | |
| }, | |
| { | |
| "epoch": 0.7360317752799788, | |
| "grad_norm": 0.21055959165096283, | |
| "learning_rate": 2.6449468630749665e-05, | |
| "loss": 0.0905, | |
| "step": 17975 | |
| }, | |
| { | |
| "epoch": 0.7370554634236226, | |
| "grad_norm": 0.23487575352191925, | |
| "learning_rate": 2.6346887694390876e-05, | |
| "loss": 0.0886, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.7380791515672666, | |
| "grad_norm": 0.2657538950443268, | |
| "learning_rate": 2.624430675803209e-05, | |
| "loss": 0.0902, | |
| "step": 18025 | |
| }, | |
| { | |
| "epoch": 0.7391028397109105, | |
| "grad_norm": 0.2803148627281189, | |
| "learning_rate": 2.6141725821673302e-05, | |
| "loss": 0.0914, | |
| "step": 18050 | |
| }, | |
| { | |
| "epoch": 0.7401265278545544, | |
| "grad_norm": 0.29323095083236694, | |
| "learning_rate": 2.6039144885314516e-05, | |
| "loss": 0.0863, | |
| "step": 18075 | |
| }, | |
| { | |
| "epoch": 0.7411502159981983, | |
| "grad_norm": 0.2417263686656952, | |
| "learning_rate": 2.5936563948955728e-05, | |
| "loss": 0.091, | |
| "step": 18100 | |
| }, | |
| { | |
| "epoch": 0.7421739041418423, | |
| "grad_norm": 0.30392271280288696, | |
| "learning_rate": 2.583398301259694e-05, | |
| "loss": 0.094, | |
| "step": 18125 | |
| }, | |
| { | |
| "epoch": 0.7431975922854861, | |
| "grad_norm": 0.24675561487674713, | |
| "learning_rate": 2.5731402076238154e-05, | |
| "loss": 0.09, | |
| "step": 18150 | |
| }, | |
| { | |
| "epoch": 0.74422128042913, | |
| "grad_norm": 0.28635236620903015, | |
| "learning_rate": 2.5628821139879365e-05, | |
| "loss": 0.0944, | |
| "step": 18175 | |
| }, | |
| { | |
| "epoch": 0.745244968572774, | |
| "grad_norm": 0.3268403112888336, | |
| "learning_rate": 2.552624020352058e-05, | |
| "loss": 0.0914, | |
| "step": 18200 | |
| }, | |
| { | |
| "epoch": 0.746268656716418, | |
| "grad_norm": 0.32864445447921753, | |
| "learning_rate": 2.542365926716179e-05, | |
| "loss": 0.0956, | |
| "step": 18225 | |
| }, | |
| { | |
| "epoch": 0.7472923448600618, | |
| "grad_norm": 0.2175736427307129, | |
| "learning_rate": 2.5321078330803006e-05, | |
| "loss": 0.0922, | |
| "step": 18250 | |
| }, | |
| { | |
| "epoch": 0.7483160330037057, | |
| "grad_norm": 0.26862508058547974, | |
| "learning_rate": 2.5218497394444217e-05, | |
| "loss": 0.0881, | |
| "step": 18275 | |
| }, | |
| { | |
| "epoch": 0.7493397211473497, | |
| "grad_norm": 0.2962358593940735, | |
| "learning_rate": 2.511591645808543e-05, | |
| "loss": 0.0886, | |
| "step": 18300 | |
| }, | |
| { | |
| "epoch": 0.7503634092909935, | |
| "grad_norm": 0.21592926979064941, | |
| "learning_rate": 2.5013335521726643e-05, | |
| "loss": 0.0852, | |
| "step": 18325 | |
| }, | |
| { | |
| "epoch": 0.7513870974346375, | |
| "grad_norm": 0.4917377531528473, | |
| "learning_rate": 2.4910754585367857e-05, | |
| "loss": 0.088, | |
| "step": 18350 | |
| }, | |
| { | |
| "epoch": 0.7524107855782814, | |
| "grad_norm": 0.2455429881811142, | |
| "learning_rate": 2.480817364900907e-05, | |
| "loss": 0.0937, | |
| "step": 18375 | |
| }, | |
| { | |
| "epoch": 0.7534344737219254, | |
| "grad_norm": 0.22315055131912231, | |
| "learning_rate": 2.4705592712650283e-05, | |
| "loss": 0.0928, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 0.7544581618655692, | |
| "grad_norm": 0.2998165190219879, | |
| "learning_rate": 2.4603011776291495e-05, | |
| "loss": 0.0973, | |
| "step": 18425 | |
| }, | |
| { | |
| "epoch": 0.7554818500092132, | |
| "grad_norm": 0.29680758714675903, | |
| "learning_rate": 2.450043083993271e-05, | |
| "loss": 0.0883, | |
| "step": 18450 | |
| }, | |
| { | |
| "epoch": 0.7565055381528571, | |
| "grad_norm": 0.34500744938850403, | |
| "learning_rate": 2.439784990357392e-05, | |
| "loss": 0.0936, | |
| "step": 18475 | |
| }, | |
| { | |
| "epoch": 0.757529226296501, | |
| "grad_norm": 0.2546531856060028, | |
| "learning_rate": 2.4295268967215135e-05, | |
| "loss": 0.0929, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 0.7585529144401449, | |
| "grad_norm": 0.2985497713088989, | |
| "learning_rate": 2.4192688030856347e-05, | |
| "loss": 0.0931, | |
| "step": 18525 | |
| }, | |
| { | |
| "epoch": 0.7595766025837889, | |
| "grad_norm": 0.21997804939746857, | |
| "learning_rate": 2.409010709449756e-05, | |
| "loss": 0.0922, | |
| "step": 18550 | |
| }, | |
| { | |
| "epoch": 0.7606002907274328, | |
| "grad_norm": 0.33792802691459656, | |
| "learning_rate": 2.3987526158138772e-05, | |
| "loss": 0.0856, | |
| "step": 18575 | |
| }, | |
| { | |
| "epoch": 0.7616239788710767, | |
| "grad_norm": 0.21099922060966492, | |
| "learning_rate": 2.3884945221779987e-05, | |
| "loss": 0.096, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 0.7626476670147206, | |
| "grad_norm": 0.29002106189727783, | |
| "learning_rate": 2.37823642854212e-05, | |
| "loss": 0.0938, | |
| "step": 18625 | |
| }, | |
| { | |
| "epoch": 0.7636713551583646, | |
| "grad_norm": 0.23993101716041565, | |
| "learning_rate": 2.3679783349062413e-05, | |
| "loss": 0.0875, | |
| "step": 18650 | |
| }, | |
| { | |
| "epoch": 0.7646950433020084, | |
| "grad_norm": 0.2299950271844864, | |
| "learning_rate": 2.3577202412703624e-05, | |
| "loss": 0.0903, | |
| "step": 18675 | |
| }, | |
| { | |
| "epoch": 0.7657187314456524, | |
| "grad_norm": 0.2547556757926941, | |
| "learning_rate": 2.347462147634484e-05, | |
| "loss": 0.0966, | |
| "step": 18700 | |
| }, | |
| { | |
| "epoch": 0.7667424195892963, | |
| "grad_norm": 0.24056895077228546, | |
| "learning_rate": 2.337204053998605e-05, | |
| "loss": 0.0901, | |
| "step": 18725 | |
| }, | |
| { | |
| "epoch": 0.7677661077329402, | |
| "grad_norm": 0.2962265610694885, | |
| "learning_rate": 2.3269459603627265e-05, | |
| "loss": 0.0941, | |
| "step": 18750 | |
| }, | |
| { | |
| "epoch": 0.7687897958765841, | |
| "grad_norm": 0.3107589781284332, | |
| "learning_rate": 2.3166878667268476e-05, | |
| "loss": 0.0922, | |
| "step": 18775 | |
| }, | |
| { | |
| "epoch": 0.7698134840202281, | |
| "grad_norm": 0.2781747877597809, | |
| "learning_rate": 2.306429773090969e-05, | |
| "loss": 0.0909, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 0.770837172163872, | |
| "grad_norm": 0.3311710059642792, | |
| "learning_rate": 2.2961716794550902e-05, | |
| "loss": 0.0877, | |
| "step": 18825 | |
| }, | |
| { | |
| "epoch": 0.7718608603075159, | |
| "grad_norm": 0.2895514965057373, | |
| "learning_rate": 2.2859135858192117e-05, | |
| "loss": 0.0973, | |
| "step": 18850 | |
| }, | |
| { | |
| "epoch": 0.7728845484511598, | |
| "grad_norm": 0.24788254499435425, | |
| "learning_rate": 2.2756554921833328e-05, | |
| "loss": 0.0922, | |
| "step": 18875 | |
| }, | |
| { | |
| "epoch": 0.7739082365948038, | |
| "grad_norm": 0.3390001952648163, | |
| "learning_rate": 2.2653973985474543e-05, | |
| "loss": 0.0951, | |
| "step": 18900 | |
| }, | |
| { | |
| "epoch": 0.7749319247384476, | |
| "grad_norm": 0.3275790214538574, | |
| "learning_rate": 2.2551393049115754e-05, | |
| "loss": 0.0902, | |
| "step": 18925 | |
| }, | |
| { | |
| "epoch": 0.7759556128820916, | |
| "grad_norm": 0.2598778009414673, | |
| "learning_rate": 2.244881211275697e-05, | |
| "loss": 0.0936, | |
| "step": 18950 | |
| }, | |
| { | |
| "epoch": 0.7769793010257355, | |
| "grad_norm": 0.32007846236228943, | |
| "learning_rate": 2.234623117639818e-05, | |
| "loss": 0.093, | |
| "step": 18975 | |
| }, | |
| { | |
| "epoch": 0.7780029891693795, | |
| "grad_norm": 0.25675615668296814, | |
| "learning_rate": 2.2243650240039395e-05, | |
| "loss": 0.097, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.7790266773130233, | |
| "grad_norm": 0.20342758297920227, | |
| "learning_rate": 2.2141069303680606e-05, | |
| "loss": 0.0941, | |
| "step": 19025 | |
| }, | |
| { | |
| "epoch": 0.7800503654566673, | |
| "grad_norm": 0.2361544668674469, | |
| "learning_rate": 2.203848836732182e-05, | |
| "loss": 0.0903, | |
| "step": 19050 | |
| }, | |
| { | |
| "epoch": 0.7810740536003112, | |
| "grad_norm": 0.2677974998950958, | |
| "learning_rate": 2.1935907430963032e-05, | |
| "loss": 0.0938, | |
| "step": 19075 | |
| }, | |
| { | |
| "epoch": 0.7820977417439551, | |
| "grad_norm": 0.3720152676105499, | |
| "learning_rate": 2.1833326494604243e-05, | |
| "loss": 0.0899, | |
| "step": 19100 | |
| }, | |
| { | |
| "epoch": 0.783121429887599, | |
| "grad_norm": 0.30042845010757446, | |
| "learning_rate": 2.1730745558245458e-05, | |
| "loss": 0.0906, | |
| "step": 19125 | |
| }, | |
| { | |
| "epoch": 0.784145118031243, | |
| "grad_norm": 0.25269991159439087, | |
| "learning_rate": 2.162816462188667e-05, | |
| "loss": 0.0899, | |
| "step": 19150 | |
| }, | |
| { | |
| "epoch": 0.7851688061748869, | |
| "grad_norm": 0.21545687317848206, | |
| "learning_rate": 2.1525583685527884e-05, | |
| "loss": 0.0888, | |
| "step": 19175 | |
| }, | |
| { | |
| "epoch": 0.7861924943185308, | |
| "grad_norm": 0.24490401148796082, | |
| "learning_rate": 2.1423002749169095e-05, | |
| "loss": 0.0899, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 0.7872161824621747, | |
| "grad_norm": 0.3394610583782196, | |
| "learning_rate": 2.132042181281031e-05, | |
| "loss": 0.0981, | |
| "step": 19225 | |
| }, | |
| { | |
| "epoch": 0.7882398706058187, | |
| "grad_norm": 0.27232640981674194, | |
| "learning_rate": 2.121784087645152e-05, | |
| "loss": 0.0888, | |
| "step": 19250 | |
| }, | |
| { | |
| "epoch": 0.7892635587494625, | |
| "grad_norm": 0.26301074028015137, | |
| "learning_rate": 2.1115259940092736e-05, | |
| "loss": 0.0897, | |
| "step": 19275 | |
| }, | |
| { | |
| "epoch": 0.7902872468931065, | |
| "grad_norm": 0.2940311133861542, | |
| "learning_rate": 2.1012679003733947e-05, | |
| "loss": 0.0912, | |
| "step": 19300 | |
| }, | |
| { | |
| "epoch": 0.7913109350367504, | |
| "grad_norm": 0.24101464450359344, | |
| "learning_rate": 2.091009806737516e-05, | |
| "loss": 0.0933, | |
| "step": 19325 | |
| }, | |
| { | |
| "epoch": 0.7923346231803943, | |
| "grad_norm": 0.3280772268772125, | |
| "learning_rate": 2.0807517131016373e-05, | |
| "loss": 0.0905, | |
| "step": 19350 | |
| }, | |
| { | |
| "epoch": 0.7933583113240382, | |
| "grad_norm": 0.3161431550979614, | |
| "learning_rate": 2.0704936194657588e-05, | |
| "loss": 0.0901, | |
| "step": 19375 | |
| }, | |
| { | |
| "epoch": 0.7943819994676822, | |
| "grad_norm": 0.28092876076698303, | |
| "learning_rate": 2.06023552582988e-05, | |
| "loss": 0.0902, | |
| "step": 19400 | |
| }, | |
| { | |
| "epoch": 0.7954056876113261, | |
| "grad_norm": 0.21107934415340424, | |
| "learning_rate": 2.0499774321940014e-05, | |
| "loss": 0.0888, | |
| "step": 19425 | |
| }, | |
| { | |
| "epoch": 0.79642937575497, | |
| "grad_norm": 0.24856053292751312, | |
| "learning_rate": 2.0397193385581225e-05, | |
| "loss": 0.0905, | |
| "step": 19450 | |
| }, | |
| { | |
| "epoch": 0.7974530638986139, | |
| "grad_norm": 0.2561679482460022, | |
| "learning_rate": 2.029461244922244e-05, | |
| "loss": 0.0922, | |
| "step": 19475 | |
| }, | |
| { | |
| "epoch": 0.7984767520422579, | |
| "grad_norm": 0.25557827949523926, | |
| "learning_rate": 2.019203151286365e-05, | |
| "loss": 0.0879, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 0.7995004401859017, | |
| "grad_norm": 0.2589765787124634, | |
| "learning_rate": 2.0089450576504865e-05, | |
| "loss": 0.0928, | |
| "step": 19525 | |
| }, | |
| { | |
| "epoch": 0.8005241283295457, | |
| "grad_norm": 0.21249115467071533, | |
| "learning_rate": 1.9986869640146077e-05, | |
| "loss": 0.0869, | |
| "step": 19550 | |
| }, | |
| { | |
| "epoch": 0.8015478164731896, | |
| "grad_norm": 0.23621489107608795, | |
| "learning_rate": 1.988428870378729e-05, | |
| "loss": 0.092, | |
| "step": 19575 | |
| }, | |
| { | |
| "epoch": 0.8025715046168336, | |
| "grad_norm": 0.2507089376449585, | |
| "learning_rate": 1.9781707767428503e-05, | |
| "loss": 0.0875, | |
| "step": 19600 | |
| }, | |
| { | |
| "epoch": 0.8035951927604774, | |
| "grad_norm": 0.28460606932640076, | |
| "learning_rate": 1.9679126831069717e-05, | |
| "loss": 0.0838, | |
| "step": 19625 | |
| }, | |
| { | |
| "epoch": 0.8046188809041214, | |
| "grad_norm": 0.3332251310348511, | |
| "learning_rate": 1.9576545894710925e-05, | |
| "loss": 0.0909, | |
| "step": 19650 | |
| }, | |
| { | |
| "epoch": 0.8056425690477653, | |
| "grad_norm": 0.26824021339416504, | |
| "learning_rate": 1.947396495835214e-05, | |
| "loss": 0.0922, | |
| "step": 19675 | |
| }, | |
| { | |
| "epoch": 0.8066662571914092, | |
| "grad_norm": 0.2643376886844635, | |
| "learning_rate": 1.937138402199335e-05, | |
| "loss": 0.0915, | |
| "step": 19700 | |
| }, | |
| { | |
| "epoch": 0.8076899453350531, | |
| "grad_norm": 0.29947948455810547, | |
| "learning_rate": 1.9268803085634566e-05, | |
| "loss": 0.0919, | |
| "step": 19725 | |
| }, | |
| { | |
| "epoch": 0.8087136334786971, | |
| "grad_norm": 0.37118449807167053, | |
| "learning_rate": 1.9166222149275777e-05, | |
| "loss": 0.0887, | |
| "step": 19750 | |
| }, | |
| { | |
| "epoch": 0.809737321622341, | |
| "grad_norm": 0.32123562693595886, | |
| "learning_rate": 1.9063641212916992e-05, | |
| "loss": 0.0913, | |
| "step": 19775 | |
| }, | |
| { | |
| "epoch": 0.8107610097659849, | |
| "grad_norm": 0.2964722514152527, | |
| "learning_rate": 1.8961060276558203e-05, | |
| "loss": 0.0915, | |
| "step": 19800 | |
| }, | |
| { | |
| "epoch": 0.8117846979096288, | |
| "grad_norm": 0.25374674797058105, | |
| "learning_rate": 1.8858479340199418e-05, | |
| "loss": 0.0918, | |
| "step": 19825 | |
| }, | |
| { | |
| "epoch": 0.8128083860532728, | |
| "grad_norm": 0.30407896637916565, | |
| "learning_rate": 1.875589840384063e-05, | |
| "loss": 0.0934, | |
| "step": 19850 | |
| }, | |
| { | |
| "epoch": 0.8138320741969166, | |
| "grad_norm": 0.284839928150177, | |
| "learning_rate": 1.8653317467481844e-05, | |
| "loss": 0.0868, | |
| "step": 19875 | |
| }, | |
| { | |
| "epoch": 0.8148557623405606, | |
| "grad_norm": 0.27440112829208374, | |
| "learning_rate": 1.8550736531123055e-05, | |
| "loss": 0.0981, | |
| "step": 19900 | |
| }, | |
| { | |
| "epoch": 0.8158794504842045, | |
| "grad_norm": 0.293817400932312, | |
| "learning_rate": 1.844815559476427e-05, | |
| "loss": 0.0937, | |
| "step": 19925 | |
| }, | |
| { | |
| "epoch": 0.8169031386278484, | |
| "grad_norm": 0.25099506974220276, | |
| "learning_rate": 1.834557465840548e-05, | |
| "loss": 0.0907, | |
| "step": 19950 | |
| }, | |
| { | |
| "epoch": 0.8179268267714923, | |
| "grad_norm": 0.2696509063243866, | |
| "learning_rate": 1.8242993722046696e-05, | |
| "loss": 0.0898, | |
| "step": 19975 | |
| }, | |
| { | |
| "epoch": 0.8189505149151363, | |
| "grad_norm": 0.23524117469787598, | |
| "learning_rate": 1.8140412785687907e-05, | |
| "loss": 0.0834, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.8199742030587802, | |
| "grad_norm": 0.28562095761299133, | |
| "learning_rate": 1.803783184932912e-05, | |
| "loss": 0.0949, | |
| "step": 20025 | |
| }, | |
| { | |
| "epoch": 0.8209978912024241, | |
| "grad_norm": 0.3326290249824524, | |
| "learning_rate": 1.7935250912970333e-05, | |
| "loss": 0.086, | |
| "step": 20050 | |
| }, | |
| { | |
| "epoch": 0.822021579346068, | |
| "grad_norm": 0.335920125246048, | |
| "learning_rate": 1.7832669976611548e-05, | |
| "loss": 0.0898, | |
| "step": 20075 | |
| }, | |
| { | |
| "epoch": 0.823045267489712, | |
| "grad_norm": 0.23107844591140747, | |
| "learning_rate": 1.773008904025276e-05, | |
| "loss": 0.0911, | |
| "step": 20100 | |
| }, | |
| { | |
| "epoch": 0.8240689556333558, | |
| "grad_norm": 0.2805933356285095, | |
| "learning_rate": 1.7627508103893973e-05, | |
| "loss": 0.0903, | |
| "step": 20125 | |
| }, | |
| { | |
| "epoch": 0.8250926437769998, | |
| "grad_norm": 0.2637193500995636, | |
| "learning_rate": 1.7524927167535185e-05, | |
| "loss": 0.0934, | |
| "step": 20150 | |
| }, | |
| { | |
| "epoch": 0.8261163319206437, | |
| "grad_norm": 0.25126680731773376, | |
| "learning_rate": 1.74223462311764e-05, | |
| "loss": 0.0967, | |
| "step": 20175 | |
| }, | |
| { | |
| "epoch": 0.8271400200642877, | |
| "grad_norm": 0.21200938522815704, | |
| "learning_rate": 1.731976529481761e-05, | |
| "loss": 0.0879, | |
| "step": 20200 | |
| }, | |
| { | |
| "epoch": 0.8281637082079315, | |
| "grad_norm": 0.2675575017929077, | |
| "learning_rate": 1.7217184358458825e-05, | |
| "loss": 0.0933, | |
| "step": 20225 | |
| }, | |
| { | |
| "epoch": 0.8291873963515755, | |
| "grad_norm": 0.24949528276920319, | |
| "learning_rate": 1.7114603422100037e-05, | |
| "loss": 0.0834, | |
| "step": 20250 | |
| }, | |
| { | |
| "epoch": 0.8302110844952194, | |
| "grad_norm": 0.31639212369918823, | |
| "learning_rate": 1.701202248574125e-05, | |
| "loss": 0.0862, | |
| "step": 20275 | |
| }, | |
| { | |
| "epoch": 0.8312347726388633, | |
| "grad_norm": 0.31430932879447937, | |
| "learning_rate": 1.6909441549382463e-05, | |
| "loss": 0.0895, | |
| "step": 20300 | |
| }, | |
| { | |
| "epoch": 0.8322584607825072, | |
| "grad_norm": 0.2188422530889511, | |
| "learning_rate": 1.6806860613023674e-05, | |
| "loss": 0.0866, | |
| "step": 20325 | |
| }, | |
| { | |
| "epoch": 0.8332821489261512, | |
| "grad_norm": 0.26949557662010193, | |
| "learning_rate": 1.670427967666489e-05, | |
| "loss": 0.0874, | |
| "step": 20350 | |
| }, | |
| { | |
| "epoch": 0.8343058370697951, | |
| "grad_norm": 0.2512851655483246, | |
| "learning_rate": 1.66016987403061e-05, | |
| "loss": 0.0886, | |
| "step": 20375 | |
| }, | |
| { | |
| "epoch": 0.835329525213439, | |
| "grad_norm": 0.21398603916168213, | |
| "learning_rate": 1.6499117803947314e-05, | |
| "loss": 0.0901, | |
| "step": 20400 | |
| }, | |
| { | |
| "epoch": 0.8363532133570829, | |
| "grad_norm": 0.3579723834991455, | |
| "learning_rate": 1.6396536867588526e-05, | |
| "loss": 0.089, | |
| "step": 20425 | |
| }, | |
| { | |
| "epoch": 0.8373769015007269, | |
| "grad_norm": 0.25546953082084656, | |
| "learning_rate": 1.629395593122974e-05, | |
| "loss": 0.09, | |
| "step": 20450 | |
| }, | |
| { | |
| "epoch": 0.8384005896443707, | |
| "grad_norm": 0.30521437525749207, | |
| "learning_rate": 1.6191374994870952e-05, | |
| "loss": 0.0878, | |
| "step": 20475 | |
| }, | |
| { | |
| "epoch": 0.8394242777880146, | |
| "grad_norm": 0.25270193815231323, | |
| "learning_rate": 1.6088794058512166e-05, | |
| "loss": 0.0871, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 0.8404479659316586, | |
| "grad_norm": 0.31624093651771545, | |
| "learning_rate": 1.5986213122153378e-05, | |
| "loss": 0.0872, | |
| "step": 20525 | |
| }, | |
| { | |
| "epoch": 0.8414716540753026, | |
| "grad_norm": 0.3739725947380066, | |
| "learning_rate": 1.5883632185794592e-05, | |
| "loss": 0.0864, | |
| "step": 20550 | |
| }, | |
| { | |
| "epoch": 0.8424953422189464, | |
| "grad_norm": 0.25170573592185974, | |
| "learning_rate": 1.5781051249435804e-05, | |
| "loss": 0.0927, | |
| "step": 20575 | |
| }, | |
| { | |
| "epoch": 0.8435190303625903, | |
| "grad_norm": 0.24413146078586578, | |
| "learning_rate": 1.5678470313077018e-05, | |
| "loss": 0.0878, | |
| "step": 20600 | |
| }, | |
| { | |
| "epoch": 0.8445427185062343, | |
| "grad_norm": 0.26711735129356384, | |
| "learning_rate": 1.557588937671823e-05, | |
| "loss": 0.0898, | |
| "step": 20625 | |
| }, | |
| { | |
| "epoch": 0.8455664066498781, | |
| "grad_norm": 0.2967755198478699, | |
| "learning_rate": 1.5473308440359444e-05, | |
| "loss": 0.093, | |
| "step": 20650 | |
| }, | |
| { | |
| "epoch": 0.8465900947935221, | |
| "grad_norm": 0.25452178716659546, | |
| "learning_rate": 1.5370727504000655e-05, | |
| "loss": 0.088, | |
| "step": 20675 | |
| }, | |
| { | |
| "epoch": 0.847613782937166, | |
| "grad_norm": 0.22610174119472504, | |
| "learning_rate": 1.526814656764187e-05, | |
| "loss": 0.0844, | |
| "step": 20700 | |
| }, | |
| { | |
| "epoch": 0.8486374710808099, | |
| "grad_norm": 0.2170991748571396, | |
| "learning_rate": 1.5165565631283083e-05, | |
| "loss": 0.0884, | |
| "step": 20725 | |
| }, | |
| { | |
| "epoch": 0.8496611592244538, | |
| "grad_norm": 0.2881997227668762, | |
| "learning_rate": 1.5062984694924296e-05, | |
| "loss": 0.0935, | |
| "step": 20750 | |
| }, | |
| { | |
| "epoch": 0.8506848473680978, | |
| "grad_norm": 0.2766591012477875, | |
| "learning_rate": 1.4960403758565509e-05, | |
| "loss": 0.0874, | |
| "step": 20775 | |
| }, | |
| { | |
| "epoch": 0.8517085355117417, | |
| "grad_norm": 0.2786926329135895, | |
| "learning_rate": 1.485782282220672e-05, | |
| "loss": 0.0892, | |
| "step": 20800 | |
| }, | |
| { | |
| "epoch": 0.8527322236553856, | |
| "grad_norm": 0.22950054705142975, | |
| "learning_rate": 1.4755241885847933e-05, | |
| "loss": 0.089, | |
| "step": 20825 | |
| }, | |
| { | |
| "epoch": 0.8537559117990295, | |
| "grad_norm": 0.43880143761634827, | |
| "learning_rate": 1.4652660949489146e-05, | |
| "loss": 0.0888, | |
| "step": 20850 | |
| }, | |
| { | |
| "epoch": 0.8547795999426735, | |
| "grad_norm": 0.24918793141841888, | |
| "learning_rate": 1.455008001313036e-05, | |
| "loss": 0.0924, | |
| "step": 20875 | |
| }, | |
| { | |
| "epoch": 0.8558032880863173, | |
| "grad_norm": 0.26215484738349915, | |
| "learning_rate": 1.4447499076771572e-05, | |
| "loss": 0.0903, | |
| "step": 20900 | |
| }, | |
| { | |
| "epoch": 0.8568269762299613, | |
| "grad_norm": 0.2752866744995117, | |
| "learning_rate": 1.4344918140412785e-05, | |
| "loss": 0.0916, | |
| "step": 20925 | |
| }, | |
| { | |
| "epoch": 0.8578506643736052, | |
| "grad_norm": 0.2551786005496979, | |
| "learning_rate": 1.4242337204053998e-05, | |
| "loss": 0.0887, | |
| "step": 20950 | |
| }, | |
| { | |
| "epoch": 0.8588743525172492, | |
| "grad_norm": 0.2203332632780075, | |
| "learning_rate": 1.4139756267695211e-05, | |
| "loss": 0.086, | |
| "step": 20975 | |
| }, | |
| { | |
| "epoch": 0.859898040660893, | |
| "grad_norm": 0.25602227449417114, | |
| "learning_rate": 1.4037175331336424e-05, | |
| "loss": 0.0927, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.860921728804537, | |
| "grad_norm": 0.27257677912712097, | |
| "learning_rate": 1.3934594394977637e-05, | |
| "loss": 0.095, | |
| "step": 21025 | |
| }, | |
| { | |
| "epoch": 0.8619454169481809, | |
| "grad_norm": 0.24853083491325378, | |
| "learning_rate": 1.383201345861885e-05, | |
| "loss": 0.0896, | |
| "step": 21050 | |
| }, | |
| { | |
| "epoch": 0.8629691050918248, | |
| "grad_norm": 0.22490383684635162, | |
| "learning_rate": 1.3729432522260063e-05, | |
| "loss": 0.089, | |
| "step": 21075 | |
| }, | |
| { | |
| "epoch": 0.8639927932354687, | |
| "grad_norm": 0.25305449962615967, | |
| "learning_rate": 1.3626851585901276e-05, | |
| "loss": 0.0879, | |
| "step": 21100 | |
| }, | |
| { | |
| "epoch": 0.8650164813791127, | |
| "grad_norm": 0.31005653738975525, | |
| "learning_rate": 1.3524270649542489e-05, | |
| "loss": 0.0927, | |
| "step": 21125 | |
| }, | |
| { | |
| "epoch": 0.8660401695227566, | |
| "grad_norm": 0.24999596178531647, | |
| "learning_rate": 1.3421689713183702e-05, | |
| "loss": 0.089, | |
| "step": 21150 | |
| }, | |
| { | |
| "epoch": 0.8670638576664005, | |
| "grad_norm": 0.23844856023788452, | |
| "learning_rate": 1.3319108776824915e-05, | |
| "loss": 0.0846, | |
| "step": 21175 | |
| }, | |
| { | |
| "epoch": 0.8680875458100444, | |
| "grad_norm": 0.2782473564147949, | |
| "learning_rate": 1.3216527840466128e-05, | |
| "loss": 0.0931, | |
| "step": 21200 | |
| }, | |
| { | |
| "epoch": 0.8691112339536884, | |
| "grad_norm": 0.22946637868881226, | |
| "learning_rate": 1.3113946904107341e-05, | |
| "loss": 0.0881, | |
| "step": 21225 | |
| }, | |
| { | |
| "epoch": 0.8701349220973322, | |
| "grad_norm": 0.28429850935935974, | |
| "learning_rate": 1.3011365967748554e-05, | |
| "loss": 0.0881, | |
| "step": 21250 | |
| }, | |
| { | |
| "epoch": 0.8711586102409762, | |
| "grad_norm": 0.39349105954170227, | |
| "learning_rate": 1.2908785031389767e-05, | |
| "loss": 0.0914, | |
| "step": 21275 | |
| }, | |
| { | |
| "epoch": 0.8721822983846201, | |
| "grad_norm": 0.3252253234386444, | |
| "learning_rate": 1.280620409503098e-05, | |
| "loss": 0.0914, | |
| "step": 21300 | |
| }, | |
| { | |
| "epoch": 0.873205986528264, | |
| "grad_norm": 0.2974836528301239, | |
| "learning_rate": 1.2703623158672193e-05, | |
| "loss": 0.0924, | |
| "step": 21325 | |
| }, | |
| { | |
| "epoch": 0.8742296746719079, | |
| "grad_norm": 0.27263307571411133, | |
| "learning_rate": 1.2601042222313406e-05, | |
| "loss": 0.092, | |
| "step": 21350 | |
| }, | |
| { | |
| "epoch": 0.8752533628155519, | |
| "grad_norm": 0.34150230884552, | |
| "learning_rate": 1.2498461285954619e-05, | |
| "loss": 0.0909, | |
| "step": 21375 | |
| }, | |
| { | |
| "epoch": 0.8762770509591958, | |
| "grad_norm": 0.27397677302360535, | |
| "learning_rate": 1.2395880349595832e-05, | |
| "loss": 0.091, | |
| "step": 21400 | |
| }, | |
| { | |
| "epoch": 0.8773007391028397, | |
| "grad_norm": 0.28834134340286255, | |
| "learning_rate": 1.2293299413237045e-05, | |
| "loss": 0.0951, | |
| "step": 21425 | |
| }, | |
| { | |
| "epoch": 0.8783244272464836, | |
| "grad_norm": 0.2486167699098587, | |
| "learning_rate": 1.2190718476878258e-05, | |
| "loss": 0.0838, | |
| "step": 21450 | |
| }, | |
| { | |
| "epoch": 0.8793481153901276, | |
| "grad_norm": 0.3068005442619324, | |
| "learning_rate": 1.208813754051947e-05, | |
| "loss": 0.0897, | |
| "step": 21475 | |
| }, | |
| { | |
| "epoch": 0.8803718035337714, | |
| "grad_norm": 0.2985325753688812, | |
| "learning_rate": 1.1985556604160684e-05, | |
| "loss": 0.0897, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 0.8813954916774154, | |
| "grad_norm": 0.2797314524650574, | |
| "learning_rate": 1.1882975667801897e-05, | |
| "loss": 0.0907, | |
| "step": 21525 | |
| }, | |
| { | |
| "epoch": 0.8824191798210593, | |
| "grad_norm": 0.22625084221363068, | |
| "learning_rate": 1.178039473144311e-05, | |
| "loss": 0.0898, | |
| "step": 21550 | |
| }, | |
| { | |
| "epoch": 0.8834428679647033, | |
| "grad_norm": 0.23003660142421722, | |
| "learning_rate": 1.1677813795084323e-05, | |
| "loss": 0.0896, | |
| "step": 21575 | |
| }, | |
| { | |
| "epoch": 0.8844665561083471, | |
| "grad_norm": 0.2965420186519623, | |
| "learning_rate": 1.1575232858725536e-05, | |
| "loss": 0.0889, | |
| "step": 21600 | |
| }, | |
| { | |
| "epoch": 0.8854902442519911, | |
| "grad_norm": 0.332224577665329, | |
| "learning_rate": 1.1472651922366748e-05, | |
| "loss": 0.0886, | |
| "step": 21625 | |
| }, | |
| { | |
| "epoch": 0.886513932395635, | |
| "grad_norm": 0.27045127749443054, | |
| "learning_rate": 1.1370070986007961e-05, | |
| "loss": 0.0899, | |
| "step": 21650 | |
| }, | |
| { | |
| "epoch": 0.8875376205392789, | |
| "grad_norm": 0.26024821400642395, | |
| "learning_rate": 1.1267490049649174e-05, | |
| "loss": 0.0939, | |
| "step": 21675 | |
| }, | |
| { | |
| "epoch": 0.8885613086829228, | |
| "grad_norm": 0.2873280942440033, | |
| "learning_rate": 1.1164909113290387e-05, | |
| "loss": 0.0912, | |
| "step": 21700 | |
| }, | |
| { | |
| "epoch": 0.8895849968265668, | |
| "grad_norm": 0.2818579077720642, | |
| "learning_rate": 1.1062328176931599e-05, | |
| "loss": 0.0909, | |
| "step": 21725 | |
| }, | |
| { | |
| "epoch": 0.8906086849702107, | |
| "grad_norm": 0.33922845125198364, | |
| "learning_rate": 1.0959747240572812e-05, | |
| "loss": 0.0859, | |
| "step": 21750 | |
| }, | |
| { | |
| "epoch": 0.8916323731138546, | |
| "grad_norm": 0.3189659118652344, | |
| "learning_rate": 1.0857166304214025e-05, | |
| "loss": 0.0874, | |
| "step": 21775 | |
| }, | |
| { | |
| "epoch": 0.8926560612574985, | |
| "grad_norm": 0.2925044000148773, | |
| "learning_rate": 1.0754585367855238e-05, | |
| "loss": 0.0874, | |
| "step": 21800 | |
| }, | |
| { | |
| "epoch": 0.8936797494011425, | |
| "grad_norm": 0.36518415808677673, | |
| "learning_rate": 1.065200443149645e-05, | |
| "loss": 0.0905, | |
| "step": 21825 | |
| }, | |
| { | |
| "epoch": 0.8947034375447863, | |
| "grad_norm": 0.29783540964126587, | |
| "learning_rate": 1.0549423495137664e-05, | |
| "loss": 0.0851, | |
| "step": 21850 | |
| }, | |
| { | |
| "epoch": 0.8957271256884303, | |
| "grad_norm": 0.23640768229961395, | |
| "learning_rate": 1.0446842558778877e-05, | |
| "loss": 0.0901, | |
| "step": 21875 | |
| }, | |
| { | |
| "epoch": 0.8967508138320742, | |
| "grad_norm": 0.26059839129447937, | |
| "learning_rate": 1.034426162242009e-05, | |
| "loss": 0.0903, | |
| "step": 21900 | |
| }, | |
| { | |
| "epoch": 0.8977745019757181, | |
| "grad_norm": 0.3090721368789673, | |
| "learning_rate": 1.0241680686061302e-05, | |
| "loss": 0.0921, | |
| "step": 21925 | |
| }, | |
| { | |
| "epoch": 0.898798190119362, | |
| "grad_norm": 0.3036380112171173, | |
| "learning_rate": 1.0139099749702515e-05, | |
| "loss": 0.0902, | |
| "step": 21950 | |
| }, | |
| { | |
| "epoch": 0.899821878263006, | |
| "grad_norm": 0.27495357394218445, | |
| "learning_rate": 1.0036518813343728e-05, | |
| "loss": 0.0855, | |
| "step": 21975 | |
| }, | |
| { | |
| "epoch": 0.9008455664066499, | |
| "grad_norm": 0.27286654710769653, | |
| "learning_rate": 9.933937876984941e-06, | |
| "loss": 0.0928, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.9018692545502938, | |
| "grad_norm": 0.27504658699035645, | |
| "learning_rate": 9.831356940626154e-06, | |
| "loss": 0.0905, | |
| "step": 22025 | |
| }, | |
| { | |
| "epoch": 0.9028929426939377, | |
| "grad_norm": 0.25373876094818115, | |
| "learning_rate": 9.728776004267367e-06, | |
| "loss": 0.0911, | |
| "step": 22050 | |
| }, | |
| { | |
| "epoch": 0.9039166308375817, | |
| "grad_norm": 0.2752918601036072, | |
| "learning_rate": 9.62619506790858e-06, | |
| "loss": 0.0897, | |
| "step": 22075 | |
| }, | |
| { | |
| "epoch": 0.9049403189812255, | |
| "grad_norm": 0.28456592559814453, | |
| "learning_rate": 9.523614131549793e-06, | |
| "loss": 0.085, | |
| "step": 22100 | |
| }, | |
| { | |
| "epoch": 0.9059640071248695, | |
| "grad_norm": 0.2836301028728485, | |
| "learning_rate": 9.421033195191006e-06, | |
| "loss": 0.0879, | |
| "step": 22125 | |
| }, | |
| { | |
| "epoch": 0.9069876952685134, | |
| "grad_norm": 0.2792745530605316, | |
| "learning_rate": 9.31845225883222e-06, | |
| "loss": 0.086, | |
| "step": 22150 | |
| }, | |
| { | |
| "epoch": 0.9080113834121574, | |
| "grad_norm": 0.2640101909637451, | |
| "learning_rate": 9.215871322473432e-06, | |
| "loss": 0.0942, | |
| "step": 22175 | |
| }, | |
| { | |
| "epoch": 0.9090350715558012, | |
| "grad_norm": 0.28286224603652954, | |
| "learning_rate": 9.113290386114645e-06, | |
| "loss": 0.0868, | |
| "step": 22200 | |
| }, | |
| { | |
| "epoch": 0.9100587596994452, | |
| "grad_norm": 0.3581150770187378, | |
| "learning_rate": 9.010709449755858e-06, | |
| "loss": 0.092, | |
| "step": 22225 | |
| }, | |
| { | |
| "epoch": 0.9110824478430891, | |
| "grad_norm": 0.2819570302963257, | |
| "learning_rate": 8.908128513397071e-06, | |
| "loss": 0.092, | |
| "step": 22250 | |
| }, | |
| { | |
| "epoch": 0.912106135986733, | |
| "grad_norm": 0.2538643777370453, | |
| "learning_rate": 8.805547577038284e-06, | |
| "loss": 0.0922, | |
| "step": 22275 | |
| }, | |
| { | |
| "epoch": 0.9131298241303769, | |
| "grad_norm": 0.2901509404182434, | |
| "learning_rate": 8.702966640679497e-06, | |
| "loss": 0.0862, | |
| "step": 22300 | |
| }, | |
| { | |
| "epoch": 0.9141535122740209, | |
| "grad_norm": 0.28954175114631653, | |
| "learning_rate": 8.60038570432071e-06, | |
| "loss": 0.0879, | |
| "step": 22325 | |
| }, | |
| { | |
| "epoch": 0.9151772004176648, | |
| "grad_norm": 0.26981502771377563, | |
| "learning_rate": 8.497804767961923e-06, | |
| "loss": 0.0889, | |
| "step": 22350 | |
| }, | |
| { | |
| "epoch": 0.9162008885613087, | |
| "grad_norm": 0.3008342683315277, | |
| "learning_rate": 8.395223831603136e-06, | |
| "loss": 0.088, | |
| "step": 22375 | |
| }, | |
| { | |
| "epoch": 0.9172245767049526, | |
| "grad_norm": 0.23977133631706238, | |
| "learning_rate": 8.292642895244349e-06, | |
| "loss": 0.0896, | |
| "step": 22400 | |
| }, | |
| { | |
| "epoch": 0.9182482648485966, | |
| "grad_norm": 0.21286515891551971, | |
| "learning_rate": 8.190061958885562e-06, | |
| "loss": 0.0933, | |
| "step": 22425 | |
| }, | |
| { | |
| "epoch": 0.9192719529922404, | |
| "grad_norm": 0.3176520764827728, | |
| "learning_rate": 8.087481022526775e-06, | |
| "loss": 0.0898, | |
| "step": 22450 | |
| }, | |
| { | |
| "epoch": 0.9202956411358844, | |
| "grad_norm": 0.2136741727590561, | |
| "learning_rate": 7.984900086167988e-06, | |
| "loss": 0.0911, | |
| "step": 22475 | |
| }, | |
| { | |
| "epoch": 0.9213193292795283, | |
| "grad_norm": 0.32107657194137573, | |
| "learning_rate": 7.882319149809201e-06, | |
| "loss": 0.0874, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 0.9223430174231722, | |
| "grad_norm": 0.2349776327610016, | |
| "learning_rate": 7.779738213450414e-06, | |
| "loss": 0.0867, | |
| "step": 22525 | |
| }, | |
| { | |
| "epoch": 0.9233667055668161, | |
| "grad_norm": 0.2386864870786667, | |
| "learning_rate": 7.677157277091627e-06, | |
| "loss": 0.0878, | |
| "step": 22550 | |
| }, | |
| { | |
| "epoch": 0.9243903937104601, | |
| "grad_norm": 0.270991712808609, | |
| "learning_rate": 7.574576340732839e-06, | |
| "loss": 0.0837, | |
| "step": 22575 | |
| }, | |
| { | |
| "epoch": 0.925414081854104, | |
| "grad_norm": 0.33399784564971924, | |
| "learning_rate": 7.471995404374052e-06, | |
| "loss": 0.0901, | |
| "step": 22600 | |
| }, | |
| { | |
| "epoch": 0.9264377699977479, | |
| "grad_norm": 0.2850496470928192, | |
| "learning_rate": 7.369414468015265e-06, | |
| "loss": 0.0898, | |
| "step": 22625 | |
| }, | |
| { | |
| "epoch": 0.9274614581413918, | |
| "grad_norm": 0.32937246561050415, | |
| "learning_rate": 7.266833531656478e-06, | |
| "loss": 0.0903, | |
| "step": 22650 | |
| }, | |
| { | |
| "epoch": 0.9284851462850358, | |
| "grad_norm": 0.22164273262023926, | |
| "learning_rate": 7.164252595297691e-06, | |
| "loss": 0.0928, | |
| "step": 22675 | |
| }, | |
| { | |
| "epoch": 0.9295088344286796, | |
| "grad_norm": 0.2599170506000519, | |
| "learning_rate": 7.061671658938904e-06, | |
| "loss": 0.0874, | |
| "step": 22700 | |
| }, | |
| { | |
| "epoch": 0.9305325225723236, | |
| "grad_norm": 0.3116656243801117, | |
| "learning_rate": 6.959090722580117e-06, | |
| "loss": 0.0845, | |
| "step": 22725 | |
| }, | |
| { | |
| "epoch": 0.9315562107159675, | |
| "grad_norm": 0.27648812532424927, | |
| "learning_rate": 6.85650978622133e-06, | |
| "loss": 0.0866, | |
| "step": 22750 | |
| }, | |
| { | |
| "epoch": 0.9325798988596115, | |
| "grad_norm": 0.26359742879867554, | |
| "learning_rate": 6.753928849862543e-06, | |
| "loss": 0.0884, | |
| "step": 22775 | |
| }, | |
| { | |
| "epoch": 0.9336035870032553, | |
| "grad_norm": 0.26720476150512695, | |
| "learning_rate": 6.651347913503756e-06, | |
| "loss": 0.0867, | |
| "step": 22800 | |
| }, | |
| { | |
| "epoch": 0.9346272751468993, | |
| "grad_norm": 0.2515944540500641, | |
| "learning_rate": 6.548766977144969e-06, | |
| "loss": 0.0883, | |
| "step": 22825 | |
| }, | |
| { | |
| "epoch": 0.9356509632905432, | |
| "grad_norm": 0.23396004736423492, | |
| "learning_rate": 6.446186040786182e-06, | |
| "loss": 0.0883, | |
| "step": 22850 | |
| }, | |
| { | |
| "epoch": 0.936674651434187, | |
| "grad_norm": 0.2513067424297333, | |
| "learning_rate": 6.343605104427394e-06, | |
| "loss": 0.0868, | |
| "step": 22875 | |
| }, | |
| { | |
| "epoch": 0.937698339577831, | |
| "grad_norm": 0.29367002844810486, | |
| "learning_rate": 6.241024168068607e-06, | |
| "loss": 0.0932, | |
| "step": 22900 | |
| }, | |
| { | |
| "epoch": 0.938722027721475, | |
| "grad_norm": 0.2306540161371231, | |
| "learning_rate": 6.13844323170982e-06, | |
| "loss": 0.0918, | |
| "step": 22925 | |
| }, | |
| { | |
| "epoch": 0.9397457158651189, | |
| "grad_norm": 0.27428171038627625, | |
| "learning_rate": 6.035862295351033e-06, | |
| "loss": 0.0881, | |
| "step": 22950 | |
| }, | |
| { | |
| "epoch": 0.9407694040087627, | |
| "grad_norm": 0.3886117935180664, | |
| "learning_rate": 5.933281358992245e-06, | |
| "loss": 0.087, | |
| "step": 22975 | |
| }, | |
| { | |
| "epoch": 0.9417930921524067, | |
| "grad_norm": 0.25603532791137695, | |
| "learning_rate": 5.830700422633458e-06, | |
| "loss": 0.0856, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 0.9428167802960506, | |
| "grad_norm": 0.30329135060310364, | |
| "learning_rate": 5.728119486274671e-06, | |
| "loss": 0.093, | |
| "step": 23025 | |
| }, | |
| { | |
| "epoch": 0.9438404684396945, | |
| "grad_norm": 0.26778334379196167, | |
| "learning_rate": 5.625538549915884e-06, | |
| "loss": 0.0896, | |
| "step": 23050 | |
| }, | |
| { | |
| "epoch": 0.9448641565833384, | |
| "grad_norm": 0.28244808316230774, | |
| "learning_rate": 5.522957613557097e-06, | |
| "loss": 0.0895, | |
| "step": 23075 | |
| }, | |
| { | |
| "epoch": 0.9458878447269824, | |
| "grad_norm": 0.353553831577301, | |
| "learning_rate": 5.42037667719831e-06, | |
| "loss": 0.0918, | |
| "step": 23100 | |
| }, | |
| { | |
| "epoch": 0.9469115328706262, | |
| "grad_norm": 0.3107817769050598, | |
| "learning_rate": 5.317795740839523e-06, | |
| "loss": 0.0929, | |
| "step": 23125 | |
| }, | |
| { | |
| "epoch": 0.9479352210142702, | |
| "grad_norm": 0.2637424170970917, | |
| "learning_rate": 5.215214804480736e-06, | |
| "loss": 0.0907, | |
| "step": 23150 | |
| }, | |
| { | |
| "epoch": 0.9489589091579141, | |
| "grad_norm": 0.2971089780330658, | |
| "learning_rate": 5.112633868121949e-06, | |
| "loss": 0.09, | |
| "step": 23175 | |
| }, | |
| { | |
| "epoch": 0.9499825973015581, | |
| "grad_norm": 0.22394390404224396, | |
| "learning_rate": 5.010052931763162e-06, | |
| "loss": 0.0903, | |
| "step": 23200 | |
| }, | |
| { | |
| "epoch": 0.9510062854452019, | |
| "grad_norm": 0.2777024805545807, | |
| "learning_rate": 4.9074719954043746e-06, | |
| "loss": 0.0873, | |
| "step": 23225 | |
| }, | |
| { | |
| "epoch": 0.9520299735888459, | |
| "grad_norm": 0.24165412783622742, | |
| "learning_rate": 4.8048910590455875e-06, | |
| "loss": 0.0911, | |
| "step": 23250 | |
| }, | |
| { | |
| "epoch": 0.9530536617324898, | |
| "grad_norm": 0.2876558005809784, | |
| "learning_rate": 4.7023101226868005e-06, | |
| "loss": 0.0904, | |
| "step": 23275 | |
| }, | |
| { | |
| "epoch": 0.9540773498761337, | |
| "grad_norm": 0.2749604284763336, | |
| "learning_rate": 4.5997291863280135e-06, | |
| "loss": 0.0894, | |
| "step": 23300 | |
| }, | |
| { | |
| "epoch": 0.9551010380197776, | |
| "grad_norm": 0.2758445143699646, | |
| "learning_rate": 4.4971482499692265e-06, | |
| "loss": 0.0899, | |
| "step": 23325 | |
| }, | |
| { | |
| "epoch": 0.9561247261634216, | |
| "grad_norm": 0.20477938652038574, | |
| "learning_rate": 4.3945673136104394e-06, | |
| "loss": 0.0878, | |
| "step": 23350 | |
| }, | |
| { | |
| "epoch": 0.9571484143070655, | |
| "grad_norm": 0.3615458607673645, | |
| "learning_rate": 4.291986377251652e-06, | |
| "loss": 0.0858, | |
| "step": 23375 | |
| }, | |
| { | |
| "epoch": 0.9581721024507094, | |
| "grad_norm": 0.28123295307159424, | |
| "learning_rate": 4.189405440892865e-06, | |
| "loss": 0.087, | |
| "step": 23400 | |
| }, | |
| { | |
| "epoch": 0.9591957905943533, | |
| "grad_norm": 0.30753856897354126, | |
| "learning_rate": 4.0868245045340775e-06, | |
| "loss": 0.0896, | |
| "step": 23425 | |
| }, | |
| { | |
| "epoch": 0.9602194787379973, | |
| "grad_norm": 0.31176239252090454, | |
| "learning_rate": 3.9842435681752905e-06, | |
| "loss": 0.0884, | |
| "step": 23450 | |
| }, | |
| { | |
| "epoch": 0.9612431668816411, | |
| "grad_norm": 0.29048678278923035, | |
| "learning_rate": 3.8816626318165035e-06, | |
| "loss": 0.0818, | |
| "step": 23475 | |
| }, | |
| { | |
| "epoch": 0.9622668550252851, | |
| "grad_norm": 0.2853899896144867, | |
| "learning_rate": 3.779081695457716e-06, | |
| "loss": 0.088, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 0.963290543168929, | |
| "grad_norm": 0.2238619327545166, | |
| "learning_rate": 3.676500759098929e-06, | |
| "loss": 0.0901, | |
| "step": 23525 | |
| }, | |
| { | |
| "epoch": 0.964314231312573, | |
| "grad_norm": 0.44698524475097656, | |
| "learning_rate": 3.573919822740142e-06, | |
| "loss": 0.0856, | |
| "step": 23550 | |
| }, | |
| { | |
| "epoch": 0.9653379194562168, | |
| "grad_norm": 0.3152156174182892, | |
| "learning_rate": 3.471338886381355e-06, | |
| "loss": 0.0931, | |
| "step": 23575 | |
| }, | |
| { | |
| "epoch": 0.9663616075998608, | |
| "grad_norm": 0.27057376503944397, | |
| "learning_rate": 3.368757950022568e-06, | |
| "loss": 0.0901, | |
| "step": 23600 | |
| }, | |
| { | |
| "epoch": 0.9673852957435047, | |
| "grad_norm": 0.29068031907081604, | |
| "learning_rate": 3.2661770136637804e-06, | |
| "loss": 0.0893, | |
| "step": 23625 | |
| }, | |
| { | |
| "epoch": 0.9684089838871486, | |
| "grad_norm": 0.2840330898761749, | |
| "learning_rate": 3.1635960773049934e-06, | |
| "loss": 0.0888, | |
| "step": 23650 | |
| }, | |
| { | |
| "epoch": 0.9694326720307925, | |
| "grad_norm": 0.31523531675338745, | |
| "learning_rate": 3.061015140946207e-06, | |
| "loss": 0.0875, | |
| "step": 23675 | |
| }, | |
| { | |
| "epoch": 0.9704563601744365, | |
| "grad_norm": 0.2516843378543854, | |
| "learning_rate": 2.95843420458742e-06, | |
| "loss": 0.0915, | |
| "step": 23700 | |
| }, | |
| { | |
| "epoch": 0.9714800483180803, | |
| "grad_norm": 0.26545655727386475, | |
| "learning_rate": 2.8558532682286323e-06, | |
| "loss": 0.0848, | |
| "step": 23725 | |
| }, | |
| { | |
| "epoch": 0.9725037364617243, | |
| "grad_norm": 0.300320565700531, | |
| "learning_rate": 2.7532723318698453e-06, | |
| "loss": 0.0886, | |
| "step": 23750 | |
| }, | |
| { | |
| "epoch": 0.9735274246053682, | |
| "grad_norm": 0.3137941360473633, | |
| "learning_rate": 2.6506913955110583e-06, | |
| "loss": 0.0872, | |
| "step": 23775 | |
| }, | |
| { | |
| "epoch": 0.9745511127490122, | |
| "grad_norm": 0.31483328342437744, | |
| "learning_rate": 2.5481104591522713e-06, | |
| "loss": 0.0895, | |
| "step": 23800 | |
| }, | |
| { | |
| "epoch": 0.975574800892656, | |
| "grad_norm": 0.28136733174324036, | |
| "learning_rate": 2.4455295227934842e-06, | |
| "loss": 0.0894, | |
| "step": 23825 | |
| }, | |
| { | |
| "epoch": 0.9765984890363, | |
| "grad_norm": 0.24842825531959534, | |
| "learning_rate": 2.342948586434697e-06, | |
| "loss": 0.0891, | |
| "step": 23850 | |
| }, | |
| { | |
| "epoch": 0.9776221771799439, | |
| "grad_norm": 0.29128360748291016, | |
| "learning_rate": 2.2403676500759098e-06, | |
| "loss": 0.0939, | |
| "step": 23875 | |
| }, | |
| { | |
| "epoch": 0.9786458653235878, | |
| "grad_norm": 0.27355626225471497, | |
| "learning_rate": 2.1377867137171227e-06, | |
| "loss": 0.0861, | |
| "step": 23900 | |
| }, | |
| { | |
| "epoch": 0.9796695534672317, | |
| "grad_norm": 0.424562931060791, | |
| "learning_rate": 2.0352057773583357e-06, | |
| "loss": 0.0888, | |
| "step": 23925 | |
| }, | |
| { | |
| "epoch": 0.9806932416108757, | |
| "grad_norm": 0.3024253845214844, | |
| "learning_rate": 1.9326248409995487e-06, | |
| "loss": 0.0889, | |
| "step": 23950 | |
| }, | |
| { | |
| "epoch": 0.9817169297545196, | |
| "grad_norm": 0.3305220603942871, | |
| "learning_rate": 1.8300439046407617e-06, | |
| "loss": 0.091, | |
| "step": 23975 | |
| }, | |
| { | |
| "epoch": 0.9827406178981635, | |
| "grad_norm": 0.29790905117988586, | |
| "learning_rate": 1.7274629682819746e-06, | |
| "loss": 0.0882, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 0.9837643060418074, | |
| "grad_norm": 0.3197941184043884, | |
| "learning_rate": 1.6248820319231876e-06, | |
| "loss": 0.0878, | |
| "step": 24025 | |
| }, | |
| { | |
| "epoch": 0.9847879941854514, | |
| "grad_norm": 0.2794630229473114, | |
| "learning_rate": 1.5223010955644004e-06, | |
| "loss": 0.0878, | |
| "step": 24050 | |
| }, | |
| { | |
| "epoch": 0.9858116823290952, | |
| "grad_norm": 0.2822708487510681, | |
| "learning_rate": 1.4197201592056133e-06, | |
| "loss": 0.0853, | |
| "step": 24075 | |
| }, | |
| { | |
| "epoch": 0.9868353704727392, | |
| "grad_norm": 0.2595159709453583, | |
| "learning_rate": 1.3171392228468263e-06, | |
| "loss": 0.0901, | |
| "step": 24100 | |
| }, | |
| { | |
| "epoch": 0.9878590586163831, | |
| "grad_norm": 0.27910885214805603, | |
| "learning_rate": 1.214558286488039e-06, | |
| "loss": 0.0837, | |
| "step": 24125 | |
| }, | |
| { | |
| "epoch": 0.9888827467600271, | |
| "grad_norm": 0.2924407720565796, | |
| "learning_rate": 1.111977350129252e-06, | |
| "loss": 0.086, | |
| "step": 24150 | |
| }, | |
| { | |
| "epoch": 0.9899064349036709, | |
| "grad_norm": 0.2329237014055252, | |
| "learning_rate": 1.0093964137704648e-06, | |
| "loss": 0.094, | |
| "step": 24175 | |
| }, | |
| { | |
| "epoch": 0.9909301230473149, | |
| "grad_norm": 0.2659105062484741, | |
| "learning_rate": 9.068154774116778e-07, | |
| "loss": 0.086, | |
| "step": 24200 | |
| }, | |
| { | |
| "epoch": 0.9919538111909588, | |
| "grad_norm": 0.2556705176830292, | |
| "learning_rate": 8.042345410528908e-07, | |
| "loss": 0.0906, | |
| "step": 24225 | |
| }, | |
| { | |
| "epoch": 0.9929774993346027, | |
| "grad_norm": 0.2836422324180603, | |
| "learning_rate": 7.016536046941037e-07, | |
| "loss": 0.0934, | |
| "step": 24250 | |
| }, | |
| { | |
| "epoch": 0.9940011874782466, | |
| "grad_norm": 0.26003921031951904, | |
| "learning_rate": 5.990726683353166e-07, | |
| "loss": 0.089, | |
| "step": 24275 | |
| }, | |
| { | |
| "epoch": 0.9950248756218906, | |
| "grad_norm": 0.2415328323841095, | |
| "learning_rate": 4.964917319765296e-07, | |
| "loss": 0.0882, | |
| "step": 24300 | |
| }, | |
| { | |
| "epoch": 0.9960485637655345, | |
| "grad_norm": 0.48855510354042053, | |
| "learning_rate": 3.9391079561774244e-07, | |
| "loss": 0.0895, | |
| "step": 24325 | |
| }, | |
| { | |
| "epoch": 0.9970722519091784, | |
| "grad_norm": 0.2958788573741913, | |
| "learning_rate": 2.9132985925895536e-07, | |
| "loss": 0.084, | |
| "step": 24350 | |
| }, | |
| { | |
| "epoch": 0.9980959400528223, | |
| "grad_norm": 0.24037177860736847, | |
| "learning_rate": 1.8874892290016825e-07, | |
| "loss": 0.0894, | |
| "step": 24375 | |
| }, | |
| { | |
| "epoch": 0.9991196281964663, | |
| "grad_norm": 0.32911139726638794, | |
| "learning_rate": 8.616798654138116e-08, | |
| "loss": 0.0882, | |
| "step": 24400 | |
| } | |
| ], | |
| "logging_steps": 25, | |
| "max_steps": 24421, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 250, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.659903259460792e+18, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |