{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.8892706320234773, "eval_steps": 500, "global_step": 3300, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0005726147018824708, "grad_norm": 7.761023998260498, "learning_rate": 0.0, "loss": 6.0592, "step": 1 }, { "epoch": 0.0011452294037649416, "grad_norm": 7.8541951179504395, "learning_rate": 5.714285714285715e-07, "loss": 6.0156, "step": 2 }, { "epoch": 0.0022904588075298832, "grad_norm": 7.347611904144287, "learning_rate": 1.7142857142857145e-06, "loss": 6.0103, "step": 4 }, { "epoch": 0.003435688211294825, "grad_norm": 5.382428169250488, "learning_rate": 2.8571428571428573e-06, "loss": 5.9221, "step": 6 }, { "epoch": 0.0045809176150597665, "grad_norm": 5.063406467437744, "learning_rate": 4.000000000000001e-06, "loss": 6.0365, "step": 8 }, { "epoch": 0.005726147018824708, "grad_norm": 9.779157638549805, "learning_rate": 5.142857142857143e-06, "loss": 6.0336, "step": 10 }, { "epoch": 0.00687137642258965, "grad_norm": 7.555446147918701, "learning_rate": 6.285714285714287e-06, "loss": 6.0328, "step": 12 }, { "epoch": 0.008016605826354592, "grad_norm": 6.790043354034424, "learning_rate": 7.428571428571429e-06, "loss": 5.7848, "step": 14 }, { "epoch": 0.009161835230119533, "grad_norm": 4.4132208824157715, "learning_rate": 8.571428571428573e-06, "loss": 5.8207, "step": 16 }, { "epoch": 0.010307064633884476, "grad_norm": 4.064995765686035, "learning_rate": 9.714285714285715e-06, "loss": 5.6497, "step": 18 }, { "epoch": 0.011452294037649417, "grad_norm": 3.357184410095215, "learning_rate": 1.0857142857142858e-05, "loss": 5.7758, "step": 20 }, { "epoch": 0.012597523441414358, "grad_norm": 2.742230176925659, "learning_rate": 1.2e-05, "loss": 5.6173, "step": 22 }, { "epoch": 0.0137427528451793, "grad_norm": 2.491459369659424, "learning_rate": 1.3142857142857143e-05, "loss": 5.6681, "step": 24 }, { "epoch": 0.014887982248944241, "grad_norm": 2.7569029331207275, "learning_rate": 1.4285714285714285e-05, "loss": 5.6393, "step": 26 }, { "epoch": 0.016033211652709184, "grad_norm": 2.208378791809082, "learning_rate": 1.5428571428571428e-05, "loss": 5.5768, "step": 28 }, { "epoch": 0.017178441056474127, "grad_norm": 3.2770133018493652, "learning_rate": 1.657142857142857e-05, "loss": 5.484, "step": 30 }, { "epoch": 0.018323670460239066, "grad_norm": 3.177299976348877, "learning_rate": 1.7714285714285713e-05, "loss": 5.528, "step": 32 }, { "epoch": 0.01946889986400401, "grad_norm": 2.1981537342071533, "learning_rate": 1.885714285714286e-05, "loss": 5.6327, "step": 34 }, { "epoch": 0.02061412926776895, "grad_norm": 3.265881061553955, "learning_rate": 2e-05, "loss": 5.6288, "step": 36 }, { "epoch": 0.02175935867153389, "grad_norm": 3.6059298515319824, "learning_rate": 2.1142857142857144e-05, "loss": 5.4789, "step": 38 }, { "epoch": 0.022904588075298833, "grad_norm": 2.4080026149749756, "learning_rate": 2.2285714285714287e-05, "loss": 5.4046, "step": 40 }, { "epoch": 0.024049817479063776, "grad_norm": 2.142902135848999, "learning_rate": 2.342857142857143e-05, "loss": 5.4738, "step": 42 }, { "epoch": 0.025195046882828715, "grad_norm": 2.4021224975585938, "learning_rate": 2.4571428571428572e-05, "loss": 5.4649, "step": 44 }, { "epoch": 0.026340276286593658, "grad_norm": 2.172009229660034, "learning_rate": 2.5714285714285714e-05, "loss": 5.4302, "step": 46 }, { "epoch": 0.0274855056903586, "grad_norm": 2.9737730026245117, "learning_rate": 2.6857142857142857e-05, "loss": 5.3045, "step": 48 }, { "epoch": 0.028630735094123543, "grad_norm": 3.0378615856170654, "learning_rate": 2.8000000000000003e-05, "loss": 5.2185, "step": 50 }, { "epoch": 0.029775964497888482, "grad_norm": 3.4448676109313965, "learning_rate": 2.9142857142857146e-05, "loss": 5.1838, "step": 52 }, { "epoch": 0.030921193901653425, "grad_norm": 2.469245672225952, "learning_rate": 3.0285714285714288e-05, "loss": 5.1637, "step": 54 }, { "epoch": 0.03206642330541837, "grad_norm": 3.58486008644104, "learning_rate": 3.142857142857143e-05, "loss": 5.2063, "step": 56 }, { "epoch": 0.03321165270918331, "grad_norm": 3.0815446376800537, "learning_rate": 3.257142857142857e-05, "loss": 5.2317, "step": 58 }, { "epoch": 0.03435688211294825, "grad_norm": 3.6842119693756104, "learning_rate": 3.3714285714285716e-05, "loss": 5.2695, "step": 60 }, { "epoch": 0.03550211151671319, "grad_norm": 2.9440791606903076, "learning_rate": 3.485714285714286e-05, "loss": 5.2686, "step": 62 }, { "epoch": 0.03664734092047813, "grad_norm": 3.9632568359375, "learning_rate": 3.6e-05, "loss": 5.1262, "step": 64 }, { "epoch": 0.037792570324243074, "grad_norm": 4.045065402984619, "learning_rate": 3.7142857142857143e-05, "loss": 5.1546, "step": 66 }, { "epoch": 0.03893779972800802, "grad_norm": 3.5707085132598877, "learning_rate": 3.8285714285714286e-05, "loss": 5.0036, "step": 68 }, { "epoch": 0.04008302913177296, "grad_norm": 3.014404535293579, "learning_rate": 3.942857142857143e-05, "loss": 5.026, "step": 70 }, { "epoch": 0.0412282585355379, "grad_norm": 2.708796977996826, "learning_rate": 4.057142857142857e-05, "loss": 4.9442, "step": 72 }, { "epoch": 0.04237348793930284, "grad_norm": 2.5384011268615723, "learning_rate": 4.1714285714285714e-05, "loss": 5.0223, "step": 74 }, { "epoch": 0.04351871734306778, "grad_norm": 3.006281852722168, "learning_rate": 4.2857142857142856e-05, "loss": 4.9827, "step": 76 }, { "epoch": 0.044663946746832724, "grad_norm": 2.5772130489349365, "learning_rate": 4.4000000000000006e-05, "loss": 4.9675, "step": 78 }, { "epoch": 0.045809176150597666, "grad_norm": 3.456017255783081, "learning_rate": 4.514285714285714e-05, "loss": 5.0341, "step": 80 }, { "epoch": 0.04695440555436261, "grad_norm": 3.3163113594055176, "learning_rate": 4.628571428571429e-05, "loss": 4.9867, "step": 82 }, { "epoch": 0.04809963495812755, "grad_norm": 3.7568469047546387, "learning_rate": 4.742857142857143e-05, "loss": 4.8652, "step": 84 }, { "epoch": 0.049244864361892494, "grad_norm": 4.19318151473999, "learning_rate": 4.8571428571428576e-05, "loss": 5.0602, "step": 86 }, { "epoch": 0.05039009376565743, "grad_norm": 5.1034064292907715, "learning_rate": 4.971428571428572e-05, "loss": 4.9757, "step": 88 }, { "epoch": 0.05153532316942237, "grad_norm": 4.0827484130859375, "learning_rate": 5.085714285714286e-05, "loss": 4.8486, "step": 90 }, { "epoch": 0.052680552573187316, "grad_norm": 4.6189446449279785, "learning_rate": 5.2000000000000004e-05, "loss": 4.9595, "step": 92 }, { "epoch": 0.05382578197695226, "grad_norm": 3.988513469696045, "learning_rate": 5.314285714285715e-05, "loss": 4.9035, "step": 94 }, { "epoch": 0.0549710113807172, "grad_norm": 3.857276678085327, "learning_rate": 5.428571428571428e-05, "loss": 4.8277, "step": 96 }, { "epoch": 0.056116240784482144, "grad_norm": 3.5372354984283447, "learning_rate": 5.542857142857143e-05, "loss": 4.7718, "step": 98 }, { "epoch": 0.057261470188247086, "grad_norm": 3.3853676319122314, "learning_rate": 5.6571428571428574e-05, "loss": 4.8098, "step": 100 }, { "epoch": 0.05840669959201202, "grad_norm": 2.1142077445983887, "learning_rate": 5.771428571428572e-05, "loss": 4.7975, "step": 102 }, { "epoch": 0.059551928995776965, "grad_norm": 3.2275538444519043, "learning_rate": 5.885714285714285e-05, "loss": 4.8509, "step": 104 }, { "epoch": 0.06069715839954191, "grad_norm": 3.5413126945495605, "learning_rate": 6e-05, "loss": 4.6069, "step": 106 }, { "epoch": 0.06184238780330685, "grad_norm": 2.755648374557495, "learning_rate": 6.114285714285714e-05, "loss": 4.6951, "step": 108 }, { "epoch": 0.06298761720707179, "grad_norm": 2.980039596557617, "learning_rate": 6.22857142857143e-05, "loss": 4.7012, "step": 110 }, { "epoch": 0.06413284661083674, "grad_norm": 4.890020370483398, "learning_rate": 6.342857142857143e-05, "loss": 4.8008, "step": 112 }, { "epoch": 0.06527807601460167, "grad_norm": 4.35846471786499, "learning_rate": 6.457142857142856e-05, "loss": 4.8587, "step": 114 }, { "epoch": 0.06642330541836662, "grad_norm": 3.6171813011169434, "learning_rate": 6.571428571428571e-05, "loss": 4.7473, "step": 116 }, { "epoch": 0.06756853482213156, "grad_norm": 2.4927010536193848, "learning_rate": 6.685714285714286e-05, "loss": 4.7113, "step": 118 }, { "epoch": 0.0687137642258965, "grad_norm": 3.3327009677886963, "learning_rate": 6.800000000000001e-05, "loss": 4.6105, "step": 120 }, { "epoch": 0.06985899362966144, "grad_norm": 3.1123206615448, "learning_rate": 6.914285714285715e-05, "loss": 4.5968, "step": 122 }, { "epoch": 0.07100422303342638, "grad_norm": 2.6985421180725098, "learning_rate": 7.028571428571428e-05, "loss": 4.6323, "step": 124 }, { "epoch": 0.07214945243719133, "grad_norm": 2.058084011077881, "learning_rate": 7.142857142857143e-05, "loss": 4.5721, "step": 126 }, { "epoch": 0.07329468184095626, "grad_norm": 2.144658327102661, "learning_rate": 7.257142857142858e-05, "loss": 4.6125, "step": 128 }, { "epoch": 0.07443991124472121, "grad_norm": 2.477219820022583, "learning_rate": 7.371428571428572e-05, "loss": 4.4727, "step": 130 }, { "epoch": 0.07558514064848615, "grad_norm": 3.8517298698425293, "learning_rate": 7.485714285714285e-05, "loss": 4.5696, "step": 132 }, { "epoch": 0.0767303700522511, "grad_norm": 3.0253565311431885, "learning_rate": 7.6e-05, "loss": 4.4838, "step": 134 }, { "epoch": 0.07787559945601603, "grad_norm": 3.397569179534912, "learning_rate": 7.714285714285715e-05, "loss": 4.6431, "step": 136 }, { "epoch": 0.07902082885978097, "grad_norm": 2.435197114944458, "learning_rate": 7.828571428571429e-05, "loss": 4.4681, "step": 138 }, { "epoch": 0.08016605826354592, "grad_norm": 2.6476476192474365, "learning_rate": 7.942857142857143e-05, "loss": 4.4462, "step": 140 }, { "epoch": 0.08131128766731086, "grad_norm": 2.1929690837860107, "learning_rate": 8.057142857142857e-05, "loss": 4.5136, "step": 142 }, { "epoch": 0.0824565170710758, "grad_norm": 2.4533395767211914, "learning_rate": 8.171428571428572e-05, "loss": 4.5572, "step": 144 }, { "epoch": 0.08360174647484074, "grad_norm": 2.601806879043579, "learning_rate": 8.285714285714287e-05, "loss": 4.4121, "step": 146 }, { "epoch": 0.08474697587860568, "grad_norm": 3.233973741531372, "learning_rate": 8.4e-05, "loss": 4.4599, "step": 148 }, { "epoch": 0.08589220528237063, "grad_norm": 2.6353538036346436, "learning_rate": 8.514285714285714e-05, "loss": 4.4533, "step": 150 }, { "epoch": 0.08703743468613556, "grad_norm": 2.8465511798858643, "learning_rate": 8.62857142857143e-05, "loss": 4.5246, "step": 152 }, { "epoch": 0.08818266408990051, "grad_norm": 2.8642711639404297, "learning_rate": 8.742857142857144e-05, "loss": 4.4659, "step": 154 }, { "epoch": 0.08932789349366545, "grad_norm": 2.793112277984619, "learning_rate": 8.857142857142857e-05, "loss": 4.5107, "step": 156 }, { "epoch": 0.0904731228974304, "grad_norm": 3.43472957611084, "learning_rate": 8.971428571428571e-05, "loss": 4.4079, "step": 158 }, { "epoch": 0.09161835230119533, "grad_norm": 2.9260294437408447, "learning_rate": 9.085714285714286e-05, "loss": 4.4047, "step": 160 }, { "epoch": 0.09276358170496027, "grad_norm": 2.6336724758148193, "learning_rate": 9.200000000000001e-05, "loss": 4.4777, "step": 162 }, { "epoch": 0.09390881110872522, "grad_norm": 2.8348231315612793, "learning_rate": 9.314285714285715e-05, "loss": 4.3445, "step": 164 }, { "epoch": 0.09505404051249015, "grad_norm": 4.271595478057861, "learning_rate": 9.428571428571429e-05, "loss": 4.4234, "step": 166 }, { "epoch": 0.0961992699162551, "grad_norm": 3.4789109230041504, "learning_rate": 9.542857142857143e-05, "loss": 4.2872, "step": 168 }, { "epoch": 0.09734449932002004, "grad_norm": 2.57273530960083, "learning_rate": 9.657142857142858e-05, "loss": 4.4177, "step": 170 }, { "epoch": 0.09848972872378499, "grad_norm": 2.185086250305176, "learning_rate": 9.771428571428572e-05, "loss": 4.3568, "step": 172 }, { "epoch": 0.09963495812754992, "grad_norm": 2.771744966506958, "learning_rate": 9.885714285714286e-05, "loss": 4.3392, "step": 174 }, { "epoch": 0.10078018753131486, "grad_norm": 1.950353741645813, "learning_rate": 0.0001, "loss": 4.1931, "step": 176 }, { "epoch": 0.10192541693507981, "grad_norm": 2.4709694385528564, "learning_rate": 9.999991040472416e-05, "loss": 4.2936, "step": 178 }, { "epoch": 0.10307064633884475, "grad_norm": 2.140997886657715, "learning_rate": 9.999964161921776e-05, "loss": 4.1653, "step": 180 }, { "epoch": 0.1042158757426097, "grad_norm": 2.491321563720703, "learning_rate": 9.999919364444403e-05, "loss": 4.3202, "step": 182 }, { "epoch": 0.10536110514637463, "grad_norm": 2.5410189628601074, "learning_rate": 9.999856648200845e-05, "loss": 4.2657, "step": 184 }, { "epoch": 0.10650633455013958, "grad_norm": 2.1820590496063232, "learning_rate": 9.999776013415866e-05, "loss": 4.2282, "step": 186 }, { "epoch": 0.10765156395390452, "grad_norm": 1.7251808643341064, "learning_rate": 9.999677460378444e-05, "loss": 4.3421, "step": 188 }, { "epoch": 0.10879679335766945, "grad_norm": 2.002145290374756, "learning_rate": 9.999560989441779e-05, "loss": 4.1361, "step": 190 }, { "epoch": 0.1099420227614344, "grad_norm": 1.9663431644439697, "learning_rate": 9.999426601023274e-05, "loss": 4.201, "step": 192 }, { "epoch": 0.11108725216519934, "grad_norm": 2.1406776905059814, "learning_rate": 9.999274295604558e-05, "loss": 4.1086, "step": 194 }, { "epoch": 0.11223248156896429, "grad_norm": 3.3888607025146484, "learning_rate": 9.999104073731458e-05, "loss": 4.2723, "step": 196 }, { "epoch": 0.11337771097272922, "grad_norm": 2.371840715408325, "learning_rate": 9.998915936014024e-05, "loss": 4.1893, "step": 198 }, { "epoch": 0.11452294037649417, "grad_norm": 2.0502302646636963, "learning_rate": 9.998709883126502e-05, "loss": 4.1395, "step": 200 }, { "epoch": 0.11566816978025911, "grad_norm": 1.6674678325653076, "learning_rate": 9.998485915807347e-05, "loss": 4.071, "step": 202 }, { "epoch": 0.11681339918402404, "grad_norm": 1.7829004526138306, "learning_rate": 9.998244034859219e-05, "loss": 4.1107, "step": 204 }, { "epoch": 0.117958628587789, "grad_norm": 1.763493299484253, "learning_rate": 9.997984241148967e-05, "loss": 4.1142, "step": 206 }, { "epoch": 0.11910385799155393, "grad_norm": 2.069258213043213, "learning_rate": 9.997706535607649e-05, "loss": 4.047, "step": 208 }, { "epoch": 0.12024908739531888, "grad_norm": 2.4262139797210693, "learning_rate": 9.997410919230505e-05, "loss": 4.0396, "step": 210 }, { "epoch": 0.12139431679908382, "grad_norm": 1.820494532585144, "learning_rate": 9.997097393076971e-05, "loss": 4.1548, "step": 212 }, { "epoch": 0.12253954620284876, "grad_norm": 2.1332643032073975, "learning_rate": 9.996765958270664e-05, "loss": 4.1384, "step": 214 }, { "epoch": 0.1236847756066137, "grad_norm": 2.1329920291900635, "learning_rate": 9.996416615999384e-05, "loss": 4.0315, "step": 216 }, { "epoch": 0.12483000501037864, "grad_norm": 2.29955792427063, "learning_rate": 9.996049367515108e-05, "loss": 4.0963, "step": 218 }, { "epoch": 0.12597523441414357, "grad_norm": 2.225827693939209, "learning_rate": 9.995664214133983e-05, "loss": 4.1247, "step": 220 }, { "epoch": 0.12712046381790854, "grad_norm": 1.794838786125183, "learning_rate": 9.99526115723633e-05, "loss": 4.0449, "step": 222 }, { "epoch": 0.12826569322167347, "grad_norm": 1.7548491954803467, "learning_rate": 9.994840198266626e-05, "loss": 3.927, "step": 224 }, { "epoch": 0.1294109226254384, "grad_norm": 1.487001895904541, "learning_rate": 9.994401338733508e-05, "loss": 3.9714, "step": 226 }, { "epoch": 0.13055615202920334, "grad_norm": 1.9811242818832397, "learning_rate": 9.993944580209768e-05, "loss": 4.0094, "step": 228 }, { "epoch": 0.13170138143296828, "grad_norm": 1.4257248640060425, "learning_rate": 9.99346992433234e-05, "loss": 4.0213, "step": 230 }, { "epoch": 0.13284661083673324, "grad_norm": 1.545812726020813, "learning_rate": 9.992977372802302e-05, "loss": 4.0076, "step": 232 }, { "epoch": 0.13399184024049818, "grad_norm": 1.8193179368972778, "learning_rate": 9.992466927384865e-05, "loss": 4.0536, "step": 234 }, { "epoch": 0.1351370696442631, "grad_norm": 2.329951763153076, "learning_rate": 9.991938589909369e-05, "loss": 3.9284, "step": 236 }, { "epoch": 0.13628229904802805, "grad_norm": 1.928336501121521, "learning_rate": 9.991392362269276e-05, "loss": 3.9462, "step": 238 }, { "epoch": 0.137427528451793, "grad_norm": 1.4073456525802612, "learning_rate": 9.990828246422164e-05, "loss": 3.9525, "step": 240 }, { "epoch": 0.13857275785555795, "grad_norm": 1.6663973331451416, "learning_rate": 9.990246244389713e-05, "loss": 3.9685, "step": 242 }, { "epoch": 0.13971798725932288, "grad_norm": 1.8091737031936646, "learning_rate": 9.989646358257715e-05, "loss": 3.9284, "step": 244 }, { "epoch": 0.14086321666308782, "grad_norm": 1.5511283874511719, "learning_rate": 9.989028590176044e-05, "loss": 3.9289, "step": 246 }, { "epoch": 0.14200844606685276, "grad_norm": 1.5394625663757324, "learning_rate": 9.988392942358664e-05, "loss": 3.9849, "step": 248 }, { "epoch": 0.14315367547061772, "grad_norm": 1.680882453918457, "learning_rate": 9.98773941708362e-05, "loss": 3.9452, "step": 250 }, { "epoch": 0.14429890487438266, "grad_norm": 1.6341670751571655, "learning_rate": 9.98706801669302e-05, "loss": 3.8317, "step": 252 }, { "epoch": 0.1454441342781476, "grad_norm": 1.9933757781982422, "learning_rate": 9.986378743593036e-05, "loss": 3.9665, "step": 254 }, { "epoch": 0.14658936368191253, "grad_norm": 2.2253994941711426, "learning_rate": 9.985671600253894e-05, "loss": 3.9239, "step": 256 }, { "epoch": 0.14773459308567746, "grad_norm": 2.2543365955352783, "learning_rate": 9.984946589209862e-05, "loss": 3.8639, "step": 258 }, { "epoch": 0.14887982248944243, "grad_norm": 1.8106629848480225, "learning_rate": 9.984203713059241e-05, "loss": 3.9178, "step": 260 }, { "epoch": 0.15002505189320736, "grad_norm": 1.638542652130127, "learning_rate": 9.983442974464362e-05, "loss": 3.9169, "step": 262 }, { "epoch": 0.1511702812969723, "grad_norm": 1.3521384000778198, "learning_rate": 9.982664376151564e-05, "loss": 3.8682, "step": 264 }, { "epoch": 0.15231551070073723, "grad_norm": 1.6458699703216553, "learning_rate": 9.981867920911201e-05, "loss": 3.9566, "step": 266 }, { "epoch": 0.1534607401045022, "grad_norm": 1.7851066589355469, "learning_rate": 9.981053611597615e-05, "loss": 3.9085, "step": 268 }, { "epoch": 0.15460596950826713, "grad_norm": 1.6740517616271973, "learning_rate": 9.980221451129137e-05, "loss": 3.8899, "step": 270 }, { "epoch": 0.15575119891203207, "grad_norm": 1.117129921913147, "learning_rate": 9.979371442488073e-05, "loss": 3.7544, "step": 272 }, { "epoch": 0.156896428315797, "grad_norm": 1.5676058530807495, "learning_rate": 9.978503588720694e-05, "loss": 3.7753, "step": 274 }, { "epoch": 0.15804165771956194, "grad_norm": 1.6609163284301758, "learning_rate": 9.977617892937223e-05, "loss": 3.8463, "step": 276 }, { "epoch": 0.1591868871233269, "grad_norm": 1.7229987382888794, "learning_rate": 9.976714358311828e-05, "loss": 3.8446, "step": 278 }, { "epoch": 0.16033211652709184, "grad_norm": 1.6770962476730347, "learning_rate": 9.975792988082603e-05, "loss": 3.8684, "step": 280 }, { "epoch": 0.16147734593085677, "grad_norm": 1.215281367301941, "learning_rate": 9.974853785551568e-05, "loss": 3.7788, "step": 282 }, { "epoch": 0.1626225753346217, "grad_norm": 1.208257794380188, "learning_rate": 9.973896754084646e-05, "loss": 3.8338, "step": 284 }, { "epoch": 0.16376780473838665, "grad_norm": 1.4068255424499512, "learning_rate": 9.972921897111658e-05, "loss": 3.8583, "step": 286 }, { "epoch": 0.1649130341421516, "grad_norm": 1.4898021221160889, "learning_rate": 9.971929218126306e-05, "loss": 3.8051, "step": 288 }, { "epoch": 0.16605826354591655, "grad_norm": 1.6303211450576782, "learning_rate": 9.970918720686164e-05, "loss": 3.8598, "step": 290 }, { "epoch": 0.16720349294968148, "grad_norm": 1.6599496603012085, "learning_rate": 9.969890408412665e-05, "loss": 3.7214, "step": 292 }, { "epoch": 0.16834872235344642, "grad_norm": 1.1958950757980347, "learning_rate": 9.968844284991086e-05, "loss": 3.7042, "step": 294 }, { "epoch": 0.16949395175721135, "grad_norm": 1.3099420070648193, "learning_rate": 9.967780354170533e-05, "loss": 3.7405, "step": 296 }, { "epoch": 0.17063918116097632, "grad_norm": 1.5054072141647339, "learning_rate": 9.966698619763936e-05, "loss": 3.7827, "step": 298 }, { "epoch": 0.17178441056474125, "grad_norm": 1.444757103919983, "learning_rate": 9.965599085648025e-05, "loss": 3.7361, "step": 300 }, { "epoch": 0.1729296399685062, "grad_norm": 0.9423370361328125, "learning_rate": 9.964481755763322e-05, "loss": 3.7063, "step": 302 }, { "epoch": 0.17407486937227112, "grad_norm": 1.044169306755066, "learning_rate": 9.963346634114128e-05, "loss": 3.7999, "step": 304 }, { "epoch": 0.1752200987760361, "grad_norm": 1.578296184539795, "learning_rate": 9.962193724768503e-05, "loss": 3.7448, "step": 306 }, { "epoch": 0.17636532817980102, "grad_norm": 1.4953491687774658, "learning_rate": 9.961023031858258e-05, "loss": 3.7625, "step": 308 }, { "epoch": 0.17751055758356596, "grad_norm": 1.295817494392395, "learning_rate": 9.959834559578934e-05, "loss": 3.7042, "step": 310 }, { "epoch": 0.1786557869873309, "grad_norm": 1.4001609086990356, "learning_rate": 9.95862831218979e-05, "loss": 3.7272, "step": 312 }, { "epoch": 0.17980101639109583, "grad_norm": 1.8881722688674927, "learning_rate": 9.95740429401379e-05, "loss": 3.6904, "step": 314 }, { "epoch": 0.1809462457948608, "grad_norm": 1.919791340827942, "learning_rate": 9.956162509437584e-05, "loss": 3.7071, "step": 316 }, { "epoch": 0.18209147519862573, "grad_norm": 1.758253574371338, "learning_rate": 9.954902962911494e-05, "loss": 3.7906, "step": 318 }, { "epoch": 0.18323670460239067, "grad_norm": 1.480323314666748, "learning_rate": 9.953625658949494e-05, "loss": 3.7697, "step": 320 }, { "epoch": 0.1843819340061556, "grad_norm": 1.5573948621749878, "learning_rate": 9.952330602129202e-05, "loss": 3.752, "step": 322 }, { "epoch": 0.18552716340992054, "grad_norm": 1.3204878568649292, "learning_rate": 9.951017797091858e-05, "loss": 3.6479, "step": 324 }, { "epoch": 0.1866723928136855, "grad_norm": 1.5514147281646729, "learning_rate": 9.949687248542303e-05, "loss": 3.7199, "step": 326 }, { "epoch": 0.18781762221745044, "grad_norm": 1.2910770177841187, "learning_rate": 9.948338961248977e-05, "loss": 3.7427, "step": 328 }, { "epoch": 0.18896285162121537, "grad_norm": 1.1663178205490112, "learning_rate": 9.946972940043882e-05, "loss": 3.6616, "step": 330 }, { "epoch": 0.1901080810249803, "grad_norm": 1.3439650535583496, "learning_rate": 9.945589189822584e-05, "loss": 3.7385, "step": 332 }, { "epoch": 0.19125331042874527, "grad_norm": 1.1256877183914185, "learning_rate": 9.94418771554418e-05, "loss": 3.6056, "step": 334 }, { "epoch": 0.1923985398325102, "grad_norm": 1.1813896894454956, "learning_rate": 9.942768522231289e-05, "loss": 3.5544, "step": 336 }, { "epoch": 0.19354376923627514, "grad_norm": 1.2541157007217407, "learning_rate": 9.941331614970031e-05, "loss": 3.6401, "step": 338 }, { "epoch": 0.19468899864004008, "grad_norm": 1.237069010734558, "learning_rate": 9.939876998910012e-05, "loss": 3.7564, "step": 340 }, { "epoch": 0.19583422804380501, "grad_norm": 1.1157530546188354, "learning_rate": 9.938404679264301e-05, "loss": 3.6164, "step": 342 }, { "epoch": 0.19697945744756998, "grad_norm": 1.149465560913086, "learning_rate": 9.936914661309412e-05, "loss": 3.6968, "step": 344 }, { "epoch": 0.1981246868513349, "grad_norm": 0.9530683755874634, "learning_rate": 9.93540695038529e-05, "loss": 3.6194, "step": 346 }, { "epoch": 0.19926991625509985, "grad_norm": 1.1686296463012695, "learning_rate": 9.933881551895281e-05, "loss": 3.7604, "step": 348 }, { "epoch": 0.20041514565886479, "grad_norm": 1.2699095010757446, "learning_rate": 9.93233847130613e-05, "loss": 3.6371, "step": 350 }, { "epoch": 0.20156037506262972, "grad_norm": 1.1345208883285522, "learning_rate": 9.930777714147945e-05, "loss": 3.6146, "step": 352 }, { "epoch": 0.20270560446639468, "grad_norm": 1.3319895267486572, "learning_rate": 9.929199286014185e-05, "loss": 3.6443, "step": 354 }, { "epoch": 0.20385083387015962, "grad_norm": 1.6053088903427124, "learning_rate": 9.927603192561637e-05, "loss": 3.6277, "step": 356 }, { "epoch": 0.20499606327392456, "grad_norm": 1.2149386405944824, "learning_rate": 9.925989439510398e-05, "loss": 3.5555, "step": 358 }, { "epoch": 0.2061412926776895, "grad_norm": 1.0859287977218628, "learning_rate": 9.924358032643855e-05, "loss": 3.6253, "step": 360 }, { "epoch": 0.20728652208145446, "grad_norm": 0.9613994359970093, "learning_rate": 9.922708977808663e-05, "loss": 3.5826, "step": 362 }, { "epoch": 0.2084317514852194, "grad_norm": 1.0509222745895386, "learning_rate": 9.921042280914721e-05, "loss": 3.6263, "step": 364 }, { "epoch": 0.20957698088898433, "grad_norm": 1.3777049779891968, "learning_rate": 9.919357947935156e-05, "loss": 3.6187, "step": 366 }, { "epoch": 0.21072221029274926, "grad_norm": 1.3364644050598145, "learning_rate": 9.9176559849063e-05, "loss": 3.5946, "step": 368 }, { "epoch": 0.2118674396965142, "grad_norm": 1.4562104940414429, "learning_rate": 9.915936397927665e-05, "loss": 3.6099, "step": 370 }, { "epoch": 0.21301266910027916, "grad_norm": 1.066383719444275, "learning_rate": 9.91419919316193e-05, "loss": 3.5395, "step": 372 }, { "epoch": 0.2141578985040441, "grad_norm": 1.6498733758926392, "learning_rate": 9.912444376834903e-05, "loss": 3.6083, "step": 374 }, { "epoch": 0.21530312790780903, "grad_norm": 0.9828553795814514, "learning_rate": 9.910671955235518e-05, "loss": 3.5409, "step": 376 }, { "epoch": 0.21644835731157397, "grad_norm": 1.178269624710083, "learning_rate": 9.908881934715798e-05, "loss": 3.6018, "step": 378 }, { "epoch": 0.2175935867153389, "grad_norm": 1.3328818082809448, "learning_rate": 9.907074321690838e-05, "loss": 3.5718, "step": 380 }, { "epoch": 0.21873881611910387, "grad_norm": 1.1077896356582642, "learning_rate": 9.905249122638783e-05, "loss": 3.581, "step": 382 }, { "epoch": 0.2198840455228688, "grad_norm": 1.220638394355774, "learning_rate": 9.903406344100798e-05, "loss": 3.5813, "step": 384 }, { "epoch": 0.22102927492663374, "grad_norm": 1.5574766397476196, "learning_rate": 9.901545992681057e-05, "loss": 3.5785, "step": 386 }, { "epoch": 0.22217450433039868, "grad_norm": 1.013902187347412, "learning_rate": 9.899668075046706e-05, "loss": 3.6156, "step": 388 }, { "epoch": 0.2233197337341636, "grad_norm": 1.197936773300171, "learning_rate": 9.897772597927848e-05, "loss": 3.5428, "step": 390 }, { "epoch": 0.22446496313792857, "grad_norm": 0.9838180541992188, "learning_rate": 9.895859568117512e-05, "loss": 3.534, "step": 392 }, { "epoch": 0.2256101925416935, "grad_norm": 1.0316840410232544, "learning_rate": 9.893928992471639e-05, "loss": 3.5691, "step": 394 }, { "epoch": 0.22675542194545845, "grad_norm": 0.9378739595413208, "learning_rate": 9.891980877909045e-05, "loss": 3.5368, "step": 396 }, { "epoch": 0.22790065134922338, "grad_norm": 1.4947346448898315, "learning_rate": 9.890015231411404e-05, "loss": 3.5709, "step": 398 }, { "epoch": 0.22904588075298835, "grad_norm": 0.9118148684501648, "learning_rate": 9.888032060023225e-05, "loss": 3.527, "step": 400 }, { "epoch": 0.23019111015675328, "grad_norm": 1.2407753467559814, "learning_rate": 9.886031370851816e-05, "loss": 3.5301, "step": 402 }, { "epoch": 0.23133633956051822, "grad_norm": 1.7163093090057373, "learning_rate": 9.88401317106727e-05, "loss": 3.5828, "step": 404 }, { "epoch": 0.23248156896428315, "grad_norm": 1.0757009983062744, "learning_rate": 9.881977467902434e-05, "loss": 3.4831, "step": 406 }, { "epoch": 0.2336267983680481, "grad_norm": 0.9473862648010254, "learning_rate": 9.879924268652885e-05, "loss": 3.5196, "step": 408 }, { "epoch": 0.23477202777181305, "grad_norm": 1.199771761894226, "learning_rate": 9.877853580676897e-05, "loss": 3.574, "step": 410 }, { "epoch": 0.235917257175578, "grad_norm": 0.9006698131561279, "learning_rate": 9.875765411395428e-05, "loss": 3.5348, "step": 412 }, { "epoch": 0.23706248657934292, "grad_norm": 1.1242282390594482, "learning_rate": 9.873659768292081e-05, "loss": 3.5249, "step": 414 }, { "epoch": 0.23820771598310786, "grad_norm": 1.0675747394561768, "learning_rate": 9.871536658913082e-05, "loss": 3.5086, "step": 416 }, { "epoch": 0.2393529453868728, "grad_norm": 0.8544116616249084, "learning_rate": 9.869396090867255e-05, "loss": 3.546, "step": 418 }, { "epoch": 0.24049817479063776, "grad_norm": 1.3136742115020752, "learning_rate": 9.867238071825992e-05, "loss": 3.4937, "step": 420 }, { "epoch": 0.2416434041944027, "grad_norm": 1.3740772008895874, "learning_rate": 9.865062609523223e-05, "loss": 3.4303, "step": 422 }, { "epoch": 0.24278863359816763, "grad_norm": 1.342213749885559, "learning_rate": 9.862869711755397e-05, "loss": 3.4982, "step": 424 }, { "epoch": 0.24393386300193257, "grad_norm": 1.0677942037582397, "learning_rate": 9.860659386381443e-05, "loss": 3.4288, "step": 426 }, { "epoch": 0.24507909240569753, "grad_norm": 0.9615838527679443, "learning_rate": 9.858431641322749e-05, "loss": 3.4787, "step": 428 }, { "epoch": 0.24622432180946247, "grad_norm": 1.0572890043258667, "learning_rate": 9.856186484563134e-05, "loss": 3.5314, "step": 430 }, { "epoch": 0.2473695512132274, "grad_norm": 1.158275842666626, "learning_rate": 9.853923924148815e-05, "loss": 3.5504, "step": 432 }, { "epoch": 0.24851478061699234, "grad_norm": 1.171581745147705, "learning_rate": 9.851643968188383e-05, "loss": 3.5478, "step": 434 }, { "epoch": 0.24966001002075727, "grad_norm": 1.0333714485168457, "learning_rate": 9.849346624852764e-05, "loss": 3.5497, "step": 436 }, { "epoch": 0.2508052394245222, "grad_norm": 0.9459155797958374, "learning_rate": 9.847031902375207e-05, "loss": 3.5074, "step": 438 }, { "epoch": 0.25195046882828714, "grad_norm": 1.0424790382385254, "learning_rate": 9.84469980905124e-05, "loss": 3.4961, "step": 440 }, { "epoch": 0.25309569823205214, "grad_norm": 1.0463571548461914, "learning_rate": 9.842350353238642e-05, "loss": 3.4405, "step": 442 }, { "epoch": 0.25424092763581707, "grad_norm": 1.000319242477417, "learning_rate": 9.839983543357421e-05, "loss": 3.4595, "step": 444 }, { "epoch": 0.255386157039582, "grad_norm": 1.2526150941848755, "learning_rate": 9.837599387889773e-05, "loss": 3.5012, "step": 446 }, { "epoch": 0.25653138644334694, "grad_norm": 1.3148843050003052, "learning_rate": 9.835197895380065e-05, "loss": 3.4767, "step": 448 }, { "epoch": 0.2576766158471119, "grad_norm": 1.3939634561538696, "learning_rate": 9.83277907443479e-05, "loss": 3.3783, "step": 450 }, { "epoch": 0.2588218452508768, "grad_norm": 1.0367929935455322, "learning_rate": 9.830342933722545e-05, "loss": 3.4289, "step": 452 }, { "epoch": 0.25996707465464175, "grad_norm": 0.9439120888710022, "learning_rate": 9.827889481974e-05, "loss": 3.4728, "step": 454 }, { "epoch": 0.2611123040584067, "grad_norm": 1.2146074771881104, "learning_rate": 9.82541872798186e-05, "loss": 3.4257, "step": 456 }, { "epoch": 0.2622575334621716, "grad_norm": 1.0530729293823242, "learning_rate": 9.822930680600841e-05, "loss": 3.4681, "step": 458 }, { "epoch": 0.26340276286593656, "grad_norm": 1.1026678085327148, "learning_rate": 9.820425348747637e-05, "loss": 3.4298, "step": 460 }, { "epoch": 0.26454799226970155, "grad_norm": 1.2520779371261597, "learning_rate": 9.817902741400879e-05, "loss": 3.4191, "step": 462 }, { "epoch": 0.2656932216734665, "grad_norm": 1.1041593551635742, "learning_rate": 9.815362867601121e-05, "loss": 3.466, "step": 464 }, { "epoch": 0.2668384510772314, "grad_norm": 0.881693422794342, "learning_rate": 9.812805736450786e-05, "loss": 3.4929, "step": 466 }, { "epoch": 0.26798368048099636, "grad_norm": 1.3125033378601074, "learning_rate": 9.810231357114152e-05, "loss": 3.4592, "step": 468 }, { "epoch": 0.2691289098847613, "grad_norm": 1.2968268394470215, "learning_rate": 9.807639738817307e-05, "loss": 3.4851, "step": 470 }, { "epoch": 0.2702741392885262, "grad_norm": 0.9855544567108154, "learning_rate": 9.805030890848119e-05, "loss": 3.4487, "step": 472 }, { "epoch": 0.27141936869229116, "grad_norm": 1.3063323497772217, "learning_rate": 9.802404822556209e-05, "loss": 3.4961, "step": 474 }, { "epoch": 0.2725645980960561, "grad_norm": 1.0567957162857056, "learning_rate": 9.79976154335291e-05, "loss": 3.3975, "step": 476 }, { "epoch": 0.27370982749982103, "grad_norm": 0.9473979473114014, "learning_rate": 9.797101062711231e-05, "loss": 3.4573, "step": 478 }, { "epoch": 0.274855056903586, "grad_norm": 1.2931294441223145, "learning_rate": 9.794423390165837e-05, "loss": 3.3732, "step": 480 }, { "epoch": 0.27600028630735096, "grad_norm": 1.233302116394043, "learning_rate": 9.791728535312998e-05, "loss": 3.419, "step": 482 }, { "epoch": 0.2771455157111159, "grad_norm": 0.9638918042182922, "learning_rate": 9.789016507810564e-05, "loss": 3.4119, "step": 484 }, { "epoch": 0.27829074511488083, "grad_norm": 1.105643391609192, "learning_rate": 9.786287317377929e-05, "loss": 3.3909, "step": 486 }, { "epoch": 0.27943597451864577, "grad_norm": 0.9666796922683716, "learning_rate": 9.783540973795998e-05, "loss": 3.4194, "step": 488 }, { "epoch": 0.2805812039224107, "grad_norm": 1.3533586263656616, "learning_rate": 9.780777486907146e-05, "loss": 3.3789, "step": 490 }, { "epoch": 0.28172643332617564, "grad_norm": 1.1253416538238525, "learning_rate": 9.777996866615186e-05, "loss": 3.4385, "step": 492 }, { "epoch": 0.2828716627299406, "grad_norm": 0.7198868989944458, "learning_rate": 9.775199122885339e-05, "loss": 3.4038, "step": 494 }, { "epoch": 0.2840168921337055, "grad_norm": 0.9696770310401917, "learning_rate": 9.772384265744188e-05, "loss": 3.4576, "step": 496 }, { "epoch": 0.28516212153747045, "grad_norm": 1.321269154548645, "learning_rate": 9.76955230527965e-05, "loss": 3.4348, "step": 498 }, { "epoch": 0.28630735094123544, "grad_norm": 1.3119802474975586, "learning_rate": 9.766703251640934e-05, "loss": 3.3848, "step": 500 }, { "epoch": 0.2874525803450004, "grad_norm": 1.0199967622756958, "learning_rate": 9.763837115038513e-05, "loss": 3.4108, "step": 502 }, { "epoch": 0.2885978097487653, "grad_norm": 0.9925194382667542, "learning_rate": 9.760953905744075e-05, "loss": 3.31, "step": 504 }, { "epoch": 0.28974303915253025, "grad_norm": 0.9447107315063477, "learning_rate": 9.758053634090502e-05, "loss": 3.3598, "step": 506 }, { "epoch": 0.2908882685562952, "grad_norm": 1.052873134613037, "learning_rate": 9.755136310471817e-05, "loss": 3.3704, "step": 508 }, { "epoch": 0.2920334979600601, "grad_norm": 1.061514139175415, "learning_rate": 9.752201945343156e-05, "loss": 3.3642, "step": 510 }, { "epoch": 0.29317872736382505, "grad_norm": 0.8627074956893921, "learning_rate": 9.74925054922073e-05, "loss": 3.367, "step": 512 }, { "epoch": 0.29432395676759, "grad_norm": 1.0214530229568481, "learning_rate": 9.746282132681785e-05, "loss": 3.3266, "step": 514 }, { "epoch": 0.2954691861713549, "grad_norm": 1.1223275661468506, "learning_rate": 9.743296706364565e-05, "loss": 3.4194, "step": 516 }, { "epoch": 0.2966144155751199, "grad_norm": 0.9849138259887695, "learning_rate": 9.740294280968273e-05, "loss": 3.3664, "step": 518 }, { "epoch": 0.29775964497888485, "grad_norm": 0.7025099396705627, "learning_rate": 9.737274867253034e-05, "loss": 3.3772, "step": 520 }, { "epoch": 0.2989048743826498, "grad_norm": 0.936536967754364, "learning_rate": 9.734238476039858e-05, "loss": 3.3196, "step": 522 }, { "epoch": 0.3000501037864147, "grad_norm": 1.113277792930603, "learning_rate": 9.731185118210598e-05, "loss": 3.4606, "step": 524 }, { "epoch": 0.30119533319017966, "grad_norm": 1.0153186321258545, "learning_rate": 9.728114804707909e-05, "loss": 3.4079, "step": 526 }, { "epoch": 0.3023405625939446, "grad_norm": 1.1675206422805786, "learning_rate": 9.725027546535215e-05, "loss": 3.4111, "step": 528 }, { "epoch": 0.30348579199770953, "grad_norm": 0.9518959522247314, "learning_rate": 9.721923354756665e-05, "loss": 3.3905, "step": 530 }, { "epoch": 0.30463102140147447, "grad_norm": 0.9693425297737122, "learning_rate": 9.718802240497098e-05, "loss": 3.4364, "step": 532 }, { "epoch": 0.3057762508052394, "grad_norm": 1.1249076128005981, "learning_rate": 9.715664214941997e-05, "loss": 3.3373, "step": 534 }, { "epoch": 0.3069214802090044, "grad_norm": 0.8406875133514404, "learning_rate": 9.712509289337453e-05, "loss": 3.321, "step": 536 }, { "epoch": 0.30806670961276933, "grad_norm": 0.9538395404815674, "learning_rate": 9.709337474990121e-05, "loss": 3.4007, "step": 538 }, { "epoch": 0.30921193901653427, "grad_norm": 0.8003599047660828, "learning_rate": 9.706148783267187e-05, "loss": 3.3798, "step": 540 }, { "epoch": 0.3103571684202992, "grad_norm": 0.8605026602745056, "learning_rate": 9.702943225596316e-05, "loss": 3.2908, "step": 542 }, { "epoch": 0.31150239782406414, "grad_norm": 0.7349815964698792, "learning_rate": 9.699720813465625e-05, "loss": 3.408, "step": 544 }, { "epoch": 0.3126476272278291, "grad_norm": 1.1622780561447144, "learning_rate": 9.696481558423628e-05, "loss": 3.3212, "step": 546 }, { "epoch": 0.313792856631594, "grad_norm": 0.9829496145248413, "learning_rate": 9.693225472079204e-05, "loss": 3.4067, "step": 548 }, { "epoch": 0.31493808603535894, "grad_norm": 1.1378313302993774, "learning_rate": 9.689952566101548e-05, "loss": 3.3556, "step": 550 }, { "epoch": 0.3160833154391239, "grad_norm": 0.9355561137199402, "learning_rate": 9.686662852220142e-05, "loss": 3.3281, "step": 552 }, { "epoch": 0.3172285448428888, "grad_norm": 0.9328277111053467, "learning_rate": 9.683356342224694e-05, "loss": 3.313, "step": 554 }, { "epoch": 0.3183737742466538, "grad_norm": 1.277377724647522, "learning_rate": 9.680033047965114e-05, "loss": 3.3499, "step": 556 }, { "epoch": 0.31951900365041874, "grad_norm": 1.0239235162734985, "learning_rate": 9.67669298135146e-05, "loss": 3.3936, "step": 558 }, { "epoch": 0.3206642330541837, "grad_norm": 0.6908963322639465, "learning_rate": 9.673336154353899e-05, "loss": 3.3584, "step": 560 }, { "epoch": 0.3218094624579486, "grad_norm": 0.8835290670394897, "learning_rate": 9.669962579002664e-05, "loss": 3.3728, "step": 562 }, { "epoch": 0.32295469186171355, "grad_norm": 1.0561710596084595, "learning_rate": 9.666572267388013e-05, "loss": 3.3579, "step": 564 }, { "epoch": 0.3240999212654785, "grad_norm": 0.8400120735168457, "learning_rate": 9.663165231660181e-05, "loss": 3.3224, "step": 566 }, { "epoch": 0.3252451506692434, "grad_norm": 0.8960584998130798, "learning_rate": 9.659741484029341e-05, "loss": 3.3434, "step": 568 }, { "epoch": 0.32639038007300836, "grad_norm": 0.9615944027900696, "learning_rate": 9.656301036765558e-05, "loss": 3.2587, "step": 570 }, { "epoch": 0.3275356094767733, "grad_norm": 0.983391523361206, "learning_rate": 9.652843902198743e-05, "loss": 3.2396, "step": 572 }, { "epoch": 0.3286808388805383, "grad_norm": 0.7758197784423828, "learning_rate": 9.649370092718615e-05, "loss": 3.2948, "step": 574 }, { "epoch": 0.3298260682843032, "grad_norm": 0.9714862704277039, "learning_rate": 9.64587962077465e-05, "loss": 3.3381, "step": 576 }, { "epoch": 0.33097129768806816, "grad_norm": 0.8628116846084595, "learning_rate": 9.64237249887604e-05, "loss": 3.294, "step": 578 }, { "epoch": 0.3321165270918331, "grad_norm": 0.9794777035713196, "learning_rate": 9.638848739591646e-05, "loss": 3.3119, "step": 580 }, { "epoch": 0.333261756495598, "grad_norm": 0.8179820775985718, "learning_rate": 9.635308355549957e-05, "loss": 3.3009, "step": 582 }, { "epoch": 0.33440698589936296, "grad_norm": 0.8732323050498962, "learning_rate": 9.63175135943904e-05, "loss": 3.3207, "step": 584 }, { "epoch": 0.3355522153031279, "grad_norm": 1.0355788469314575, "learning_rate": 9.628177764006497e-05, "loss": 3.2889, "step": 586 }, { "epoch": 0.33669744470689283, "grad_norm": 0.8974720239639282, "learning_rate": 9.624587582059417e-05, "loss": 3.3089, "step": 588 }, { "epoch": 0.33784267411065777, "grad_norm": 0.7800531387329102, "learning_rate": 9.620980826464335e-05, "loss": 3.2999, "step": 590 }, { "epoch": 0.3389879035144227, "grad_norm": 0.7294676899909973, "learning_rate": 9.617357510147182e-05, "loss": 3.3634, "step": 592 }, { "epoch": 0.3401331329181877, "grad_norm": 0.7799131274223328, "learning_rate": 9.613717646093239e-05, "loss": 3.308, "step": 594 }, { "epoch": 0.34127836232195263, "grad_norm": 0.9899328947067261, "learning_rate": 9.610061247347091e-05, "loss": 3.3191, "step": 596 }, { "epoch": 0.34242359172571757, "grad_norm": 1.0520347356796265, "learning_rate": 9.606388327012579e-05, "loss": 3.389, "step": 598 }, { "epoch": 0.3435688211294825, "grad_norm": 0.9768466353416443, "learning_rate": 9.602698898252756e-05, "loss": 3.2905, "step": 600 }, { "epoch": 0.34471405053324744, "grad_norm": 0.9359555244445801, "learning_rate": 9.598992974289837e-05, "loss": 3.3022, "step": 602 }, { "epoch": 0.3458592799370124, "grad_norm": 0.7487738728523254, "learning_rate": 9.595270568405156e-05, "loss": 3.2234, "step": 604 }, { "epoch": 0.3470045093407773, "grad_norm": 0.8295655846595764, "learning_rate": 9.591531693939109e-05, "loss": 3.3506, "step": 606 }, { "epoch": 0.34814973874454225, "grad_norm": 0.9020605683326721, "learning_rate": 9.587776364291117e-05, "loss": 3.3026, "step": 608 }, { "epoch": 0.3492949681483072, "grad_norm": 0.7868961095809937, "learning_rate": 9.58400459291957e-05, "loss": 3.2393, "step": 610 }, { "epoch": 0.3504401975520722, "grad_norm": 0.9779835939407349, "learning_rate": 9.580216393341785e-05, "loss": 3.3254, "step": 612 }, { "epoch": 0.3515854269558371, "grad_norm": 0.8962246179580688, "learning_rate": 9.576411779133956e-05, "loss": 3.2486, "step": 614 }, { "epoch": 0.35273065635960205, "grad_norm": 0.9166551828384399, "learning_rate": 9.572590763931097e-05, "loss": 3.2193, "step": 616 }, { "epoch": 0.353875885763367, "grad_norm": 0.7779364585876465, "learning_rate": 9.568753361427009e-05, "loss": 3.2469, "step": 618 }, { "epoch": 0.3550211151671319, "grad_norm": 0.750092089176178, "learning_rate": 9.564899585374214e-05, "loss": 3.2532, "step": 620 }, { "epoch": 0.35616634457089685, "grad_norm": 1.0269392728805542, "learning_rate": 9.561029449583919e-05, "loss": 3.3331, "step": 622 }, { "epoch": 0.3573115739746618, "grad_norm": 0.7937965989112854, "learning_rate": 9.557142967925956e-05, "loss": 3.314, "step": 624 }, { "epoch": 0.3584568033784267, "grad_norm": 1.1338940858840942, "learning_rate": 9.553240154328744e-05, "loss": 3.3375, "step": 626 }, { "epoch": 0.35960203278219166, "grad_norm": 0.7937076091766357, "learning_rate": 9.549321022779229e-05, "loss": 3.2691, "step": 628 }, { "epoch": 0.36074726218595665, "grad_norm": 0.8552340865135193, "learning_rate": 9.545385587322839e-05, "loss": 3.3107, "step": 630 }, { "epoch": 0.3618924915897216, "grad_norm": 1.0279617309570312, "learning_rate": 9.541433862063429e-05, "loss": 3.2552, "step": 632 }, { "epoch": 0.3630377209934865, "grad_norm": 0.9652466177940369, "learning_rate": 9.537465861163237e-05, "loss": 3.242, "step": 634 }, { "epoch": 0.36418295039725146, "grad_norm": 0.9129723310470581, "learning_rate": 9.533481598842827e-05, "loss": 3.3131, "step": 636 }, { "epoch": 0.3653281798010164, "grad_norm": 0.9316424131393433, "learning_rate": 9.529481089381042e-05, "loss": 3.3288, "step": 638 }, { "epoch": 0.36647340920478133, "grad_norm": 0.7987300753593445, "learning_rate": 9.525464347114953e-05, "loss": 3.2832, "step": 640 }, { "epoch": 0.36761863860854627, "grad_norm": 0.7103368043899536, "learning_rate": 9.521431386439807e-05, "loss": 3.2339, "step": 642 }, { "epoch": 0.3687638680123112, "grad_norm": 0.7420955896377563, "learning_rate": 9.517382221808969e-05, "loss": 3.1662, "step": 644 }, { "epoch": 0.36990909741607614, "grad_norm": 0.8201749324798584, "learning_rate": 9.513316867733883e-05, "loss": 3.2837, "step": 646 }, { "epoch": 0.3710543268198411, "grad_norm": 0.8581364154815674, "learning_rate": 9.509235338784009e-05, "loss": 3.2949, "step": 648 }, { "epoch": 0.37219955622360607, "grad_norm": 0.956118643283844, "learning_rate": 9.505137649586775e-05, "loss": 3.316, "step": 650 }, { "epoch": 0.373344785627371, "grad_norm": 0.708759069442749, "learning_rate": 9.501023814827524e-05, "loss": 3.1951, "step": 652 }, { "epoch": 0.37449001503113594, "grad_norm": 0.8143038153648376, "learning_rate": 9.496893849249464e-05, "loss": 3.2738, "step": 654 }, { "epoch": 0.3756352444349009, "grad_norm": 0.6578754782676697, "learning_rate": 9.492747767653611e-05, "loss": 3.2809, "step": 656 }, { "epoch": 0.3767804738386658, "grad_norm": 0.8550508618354797, "learning_rate": 9.488585584898738e-05, "loss": 3.2668, "step": 658 }, { "epoch": 0.37792570324243074, "grad_norm": 0.795080304145813, "learning_rate": 9.48440731590132e-05, "loss": 3.28, "step": 660 }, { "epoch": 0.3790709326461957, "grad_norm": 0.9933105707168579, "learning_rate": 9.480212975635486e-05, "loss": 3.3104, "step": 662 }, { "epoch": 0.3802161620499606, "grad_norm": 1.224338412284851, "learning_rate": 9.476002579132957e-05, "loss": 3.29, "step": 664 }, { "epoch": 0.38136139145372555, "grad_norm": 0.8564585447311401, "learning_rate": 9.471776141483e-05, "loss": 3.2, "step": 666 }, { "epoch": 0.38250662085749054, "grad_norm": 1.160684585571289, "learning_rate": 9.467533677832365e-05, "loss": 3.2226, "step": 668 }, { "epoch": 0.3836518502612555, "grad_norm": 0.8671857714653015, "learning_rate": 9.463275203385244e-05, "loss": 3.2453, "step": 670 }, { "epoch": 0.3847970796650204, "grad_norm": 1.0225045680999756, "learning_rate": 9.459000733403205e-05, "loss": 3.2283, "step": 672 }, { "epoch": 0.38594230906878535, "grad_norm": 0.8350477814674377, "learning_rate": 9.454710283205139e-05, "loss": 3.2584, "step": 674 }, { "epoch": 0.3870875384725503, "grad_norm": 0.8098021745681763, "learning_rate": 9.450403868167208e-05, "loss": 3.2836, "step": 676 }, { "epoch": 0.3882327678763152, "grad_norm": 0.8174638748168945, "learning_rate": 9.446081503722792e-05, "loss": 3.1896, "step": 678 }, { "epoch": 0.38937799728008016, "grad_norm": 0.6904940009117126, "learning_rate": 9.441743205362426e-05, "loss": 3.2464, "step": 680 }, { "epoch": 0.3905232266838451, "grad_norm": 0.692864716053009, "learning_rate": 9.437388988633752e-05, "loss": 3.2277, "step": 682 }, { "epoch": 0.39166845608761003, "grad_norm": 0.7014842629432678, "learning_rate": 9.433018869141464e-05, "loss": 3.2372, "step": 684 }, { "epoch": 0.39281368549137496, "grad_norm": 0.6166806817054749, "learning_rate": 9.428632862547237e-05, "loss": 3.2501, "step": 686 }, { "epoch": 0.39395891489513996, "grad_norm": 0.7060846090316772, "learning_rate": 9.424230984569696e-05, "loss": 3.2881, "step": 688 }, { "epoch": 0.3951041442989049, "grad_norm": 0.7771391272544861, "learning_rate": 9.419813250984337e-05, "loss": 3.2149, "step": 690 }, { "epoch": 0.3962493737026698, "grad_norm": 0.6290923953056335, "learning_rate": 9.415379677623485e-05, "loss": 3.1555, "step": 692 }, { "epoch": 0.39739460310643476, "grad_norm": 0.7270971536636353, "learning_rate": 9.410930280376225e-05, "loss": 3.2554, "step": 694 }, { "epoch": 0.3985398325101997, "grad_norm": 0.681962788105011, "learning_rate": 9.40646507518836e-05, "loss": 3.1671, "step": 696 }, { "epoch": 0.39968506191396463, "grad_norm": 0.5727997422218323, "learning_rate": 9.40198407806234e-05, "loss": 3.237, "step": 698 }, { "epoch": 0.40083029131772957, "grad_norm": 0.7687988877296448, "learning_rate": 9.39748730505721e-05, "loss": 3.2357, "step": 700 }, { "epoch": 0.4019755207214945, "grad_norm": 0.7813317179679871, "learning_rate": 9.392974772288558e-05, "loss": 3.2101, "step": 702 }, { "epoch": 0.40312075012525944, "grad_norm": 0.8766132593154907, "learning_rate": 9.388446495928446e-05, "loss": 3.2852, "step": 704 }, { "epoch": 0.40426597952902443, "grad_norm": 0.7857736349105835, "learning_rate": 9.383902492205363e-05, "loss": 3.2113, "step": 706 }, { "epoch": 0.40541120893278937, "grad_norm": 0.9073331356048584, "learning_rate": 9.379342777404159e-05, "loss": 3.2478, "step": 708 }, { "epoch": 0.4065564383365543, "grad_norm": 0.8033682107925415, "learning_rate": 9.374767367865989e-05, "loss": 3.3159, "step": 710 }, { "epoch": 0.40770166774031924, "grad_norm": 0.7821508646011353, "learning_rate": 9.370176279988256e-05, "loss": 3.2362, "step": 712 }, { "epoch": 0.4088468971440842, "grad_norm": 0.8257923126220703, "learning_rate": 9.365569530224554e-05, "loss": 3.1832, "step": 714 }, { "epoch": 0.4099921265478491, "grad_norm": 0.8349987864494324, "learning_rate": 9.360947135084603e-05, "loss": 3.1995, "step": 716 }, { "epoch": 0.41113735595161405, "grad_norm": 0.8590210676193237, "learning_rate": 9.356309111134191e-05, "loss": 3.2119, "step": 718 }, { "epoch": 0.412282585355379, "grad_norm": 0.8512969017028809, "learning_rate": 9.351655474995122e-05, "loss": 3.2323, "step": 720 }, { "epoch": 0.4134278147591439, "grad_norm": 0.6388457417488098, "learning_rate": 9.346986243345149e-05, "loss": 3.1677, "step": 722 }, { "epoch": 0.4145730441629089, "grad_norm": 0.8811210989952087, "learning_rate": 9.342301432917912e-05, "loss": 3.2307, "step": 724 }, { "epoch": 0.41571827356667385, "grad_norm": 0.9297654628753662, "learning_rate": 9.337601060502891e-05, "loss": 3.1838, "step": 726 }, { "epoch": 0.4168635029704388, "grad_norm": 0.750491201877594, "learning_rate": 9.332885142945329e-05, "loss": 3.23, "step": 728 }, { "epoch": 0.4180087323742037, "grad_norm": 0.8282638192176819, "learning_rate": 9.328153697146186e-05, "loss": 3.1789, "step": 730 }, { "epoch": 0.41915396177796865, "grad_norm": 0.7395208477973938, "learning_rate": 9.323406740062068e-05, "loss": 3.2881, "step": 732 }, { "epoch": 0.4202991911817336, "grad_norm": 0.5959879755973816, "learning_rate": 9.318644288705172e-05, "loss": 3.1879, "step": 734 }, { "epoch": 0.4214444205854985, "grad_norm": 0.6063298583030701, "learning_rate": 9.313866360143227e-05, "loss": 3.273, "step": 736 }, { "epoch": 0.42258964998926346, "grad_norm": 0.6868070960044861, "learning_rate": 9.309072971499422e-05, "loss": 3.2145, "step": 738 }, { "epoch": 0.4237348793930284, "grad_norm": 0.6153081655502319, "learning_rate": 9.304264139952356e-05, "loss": 3.0791, "step": 740 }, { "epoch": 0.42488010879679333, "grad_norm": 0.6345932483673096, "learning_rate": 9.299439882735977e-05, "loss": 3.1991, "step": 742 }, { "epoch": 0.4260253382005583, "grad_norm": 0.7605310082435608, "learning_rate": 9.294600217139506e-05, "loss": 3.1272, "step": 744 }, { "epoch": 0.42717056760432326, "grad_norm": 0.6695173382759094, "learning_rate": 9.289745160507395e-05, "loss": 3.1482, "step": 746 }, { "epoch": 0.4283157970080882, "grad_norm": 0.8121134638786316, "learning_rate": 9.284874730239244e-05, "loss": 3.2122, "step": 748 }, { "epoch": 0.42946102641185313, "grad_norm": 0.8771198391914368, "learning_rate": 9.279988943789759e-05, "loss": 3.1768, "step": 750 }, { "epoch": 0.43060625581561807, "grad_norm": 0.7993550300598145, "learning_rate": 9.275087818668675e-05, "loss": 3.1944, "step": 752 }, { "epoch": 0.431751485219383, "grad_norm": 0.6639721393585205, "learning_rate": 9.270171372440697e-05, "loss": 3.1418, "step": 754 }, { "epoch": 0.43289671462314794, "grad_norm": 0.7494943737983704, "learning_rate": 9.265239622725438e-05, "loss": 3.1956, "step": 756 }, { "epoch": 0.4340419440269129, "grad_norm": 0.7307000160217285, "learning_rate": 9.26029258719736e-05, "loss": 3.133, "step": 758 }, { "epoch": 0.4351871734306778, "grad_norm": 0.7357375621795654, "learning_rate": 9.255330283585701e-05, "loss": 3.1898, "step": 760 }, { "epoch": 0.4363324028344428, "grad_norm": 0.6649693250656128, "learning_rate": 9.250352729674422e-05, "loss": 3.2147, "step": 762 }, { "epoch": 0.43747763223820774, "grad_norm": 0.6873495578765869, "learning_rate": 9.245359943302133e-05, "loss": 3.2341, "step": 764 }, { "epoch": 0.43862286164197267, "grad_norm": 0.7320956587791443, "learning_rate": 9.240351942362038e-05, "loss": 3.1241, "step": 766 }, { "epoch": 0.4397680910457376, "grad_norm": 0.6137463450431824, "learning_rate": 9.235328744801868e-05, "loss": 3.1529, "step": 768 }, { "epoch": 0.44091332044950254, "grad_norm": 0.8658304214477539, "learning_rate": 9.230290368623809e-05, "loss": 3.2168, "step": 770 }, { "epoch": 0.4420585498532675, "grad_norm": 0.7436694502830505, "learning_rate": 9.225236831884454e-05, "loss": 3.1798, "step": 772 }, { "epoch": 0.4432037792570324, "grad_norm": 0.9040384888648987, "learning_rate": 9.220168152694722e-05, "loss": 3.2241, "step": 774 }, { "epoch": 0.44434900866079735, "grad_norm": 0.7236924171447754, "learning_rate": 9.215084349219801e-05, "loss": 3.183, "step": 776 }, { "epoch": 0.4454942380645623, "grad_norm": 0.8633347153663635, "learning_rate": 9.209985439679081e-05, "loss": 3.1776, "step": 778 }, { "epoch": 0.4466394674683272, "grad_norm": 0.730910062789917, "learning_rate": 9.204871442346091e-05, "loss": 3.1633, "step": 780 }, { "epoch": 0.4477846968720922, "grad_norm": 0.809923529624939, "learning_rate": 9.199742375548432e-05, "loss": 3.1736, "step": 782 }, { "epoch": 0.44892992627585715, "grad_norm": 0.7229586839675903, "learning_rate": 9.194598257667711e-05, "loss": 3.1813, "step": 784 }, { "epoch": 0.4500751556796221, "grad_norm": 0.6999960541725159, "learning_rate": 9.189439107139472e-05, "loss": 3.1125, "step": 786 }, { "epoch": 0.451220385083387, "grad_norm": 0.7234693169593811, "learning_rate": 9.184264942453138e-05, "loss": 3.137, "step": 788 }, { "epoch": 0.45236561448715196, "grad_norm": 0.7283908724784851, "learning_rate": 9.179075782151936e-05, "loss": 3.1672, "step": 790 }, { "epoch": 0.4535108438909169, "grad_norm": 0.793543815612793, "learning_rate": 9.173871644832834e-05, "loss": 3.1925, "step": 792 }, { "epoch": 0.45465607329468183, "grad_norm": 0.7263696789741516, "learning_rate": 9.168652549146481e-05, "loss": 3.1609, "step": 794 }, { "epoch": 0.45580130269844676, "grad_norm": 0.7698031663894653, "learning_rate": 9.163418513797126e-05, "loss": 3.2547, "step": 796 }, { "epoch": 0.4569465321022117, "grad_norm": 0.908698320388794, "learning_rate": 9.158169557542566e-05, "loss": 3.2165, "step": 798 }, { "epoch": 0.4580917615059767, "grad_norm": 0.9588857293128967, "learning_rate": 9.152905699194065e-05, "loss": 3.1743, "step": 800 }, { "epoch": 0.4592369909097416, "grad_norm": 0.7442302107810974, "learning_rate": 9.1476269576163e-05, "loss": 3.1088, "step": 802 }, { "epoch": 0.46038222031350656, "grad_norm": 0.7421006560325623, "learning_rate": 9.14233335172728e-05, "loss": 3.1497, "step": 804 }, { "epoch": 0.4615274497172715, "grad_norm": 0.8878415822982788, "learning_rate": 9.13702490049829e-05, "loss": 3.1924, "step": 806 }, { "epoch": 0.46267267912103643, "grad_norm": 0.6878317594528198, "learning_rate": 9.131701622953816e-05, "loss": 3.1366, "step": 808 }, { "epoch": 0.46381790852480137, "grad_norm": 0.7945599555969238, "learning_rate": 9.126363538171478e-05, "loss": 3.1926, "step": 810 }, { "epoch": 0.4649631379285663, "grad_norm": 0.7997886538505554, "learning_rate": 9.121010665281964e-05, "loss": 3.1521, "step": 812 }, { "epoch": 0.46610836733233124, "grad_norm": 0.715614378452301, "learning_rate": 9.115643023468958e-05, "loss": 3.1904, "step": 814 }, { "epoch": 0.4672535967360962, "grad_norm": 0.7846017479896545, "learning_rate": 9.110260631969077e-05, "loss": 3.1338, "step": 816 }, { "epoch": 0.46839882613986117, "grad_norm": 0.6939677596092224, "learning_rate": 9.10486351007179e-05, "loss": 3.1635, "step": 818 }, { "epoch": 0.4695440555436261, "grad_norm": 0.7764283418655396, "learning_rate": 9.099451677119366e-05, "loss": 3.1922, "step": 820 }, { "epoch": 0.47068928494739104, "grad_norm": 0.753666877746582, "learning_rate": 9.094025152506788e-05, "loss": 3.0827, "step": 822 }, { "epoch": 0.471834514351156, "grad_norm": 0.6793937683105469, "learning_rate": 9.088583955681699e-05, "loss": 3.1235, "step": 824 }, { "epoch": 0.4729797437549209, "grad_norm": 0.645055890083313, "learning_rate": 9.08312810614432e-05, "loss": 3.1758, "step": 826 }, { "epoch": 0.47412497315868585, "grad_norm": 0.7241025567054749, "learning_rate": 9.077657623447379e-05, "loss": 3.1636, "step": 828 }, { "epoch": 0.4752702025624508, "grad_norm": 0.762117862701416, "learning_rate": 9.07217252719606e-05, "loss": 3.1423, "step": 830 }, { "epoch": 0.4764154319662157, "grad_norm": 0.7575943470001221, "learning_rate": 9.066672837047907e-05, "loss": 3.1304, "step": 832 }, { "epoch": 0.47756066136998065, "grad_norm": 0.8326764106750488, "learning_rate": 9.061158572712769e-05, "loss": 3.1807, "step": 834 }, { "epoch": 0.4787058907737456, "grad_norm": 0.7815741300582886, "learning_rate": 9.055629753952731e-05, "loss": 3.2113, "step": 836 }, { "epoch": 0.4798511201775106, "grad_norm": 0.7716583609580994, "learning_rate": 9.050086400582033e-05, "loss": 3.1791, "step": 838 }, { "epoch": 0.4809963495812755, "grad_norm": 0.6160004734992981, "learning_rate": 9.044528532467006e-05, "loss": 3.1696, "step": 840 }, { "epoch": 0.48214157898504045, "grad_norm": 0.8025004267692566, "learning_rate": 9.038956169525998e-05, "loss": 3.2002, "step": 842 }, { "epoch": 0.4832868083888054, "grad_norm": 0.733741819858551, "learning_rate": 9.033369331729307e-05, "loss": 3.1661, "step": 844 }, { "epoch": 0.4844320377925703, "grad_norm": 0.7210118770599365, "learning_rate": 9.027768039099103e-05, "loss": 3.1492, "step": 846 }, { "epoch": 0.48557726719633526, "grad_norm": 0.6915583610534668, "learning_rate": 9.02215231170936e-05, "loss": 3.1892, "step": 848 }, { "epoch": 0.4867224966001002, "grad_norm": 0.6812649965286255, "learning_rate": 9.016522169685783e-05, "loss": 3.1404, "step": 850 }, { "epoch": 0.48786772600386513, "grad_norm": 0.7272056341171265, "learning_rate": 9.010877633205738e-05, "loss": 3.1935, "step": 852 }, { "epoch": 0.48901295540763007, "grad_norm": 0.7162798643112183, "learning_rate": 9.005218722498177e-05, "loss": 3.1949, "step": 854 }, { "epoch": 0.49015818481139506, "grad_norm": 0.6110600829124451, "learning_rate": 8.999545457843568e-05, "loss": 3.1217, "step": 856 }, { "epoch": 0.49130341421516, "grad_norm": 0.657370924949646, "learning_rate": 8.993857859573818e-05, "loss": 3.1381, "step": 858 }, { "epoch": 0.49244864361892493, "grad_norm": 0.8181600570678711, "learning_rate": 8.988155948072203e-05, "loss": 3.1527, "step": 860 }, { "epoch": 0.49359387302268987, "grad_norm": 0.586644172668457, "learning_rate": 8.9824397437733e-05, "loss": 3.1328, "step": 862 }, { "epoch": 0.4947391024264548, "grad_norm": 0.8710150718688965, "learning_rate": 8.976709267162903e-05, "loss": 3.1509, "step": 864 }, { "epoch": 0.49588433183021974, "grad_norm": 0.7185545563697815, "learning_rate": 8.970964538777957e-05, "loss": 3.0628, "step": 866 }, { "epoch": 0.4970295612339847, "grad_norm": 0.7242484092712402, "learning_rate": 8.965205579206483e-05, "loss": 3.0603, "step": 868 }, { "epoch": 0.4981747906377496, "grad_norm": 0.7996972799301147, "learning_rate": 8.959432409087504e-05, "loss": 3.2346, "step": 870 }, { "epoch": 0.49932002004151455, "grad_norm": 0.6038782000541687, "learning_rate": 8.953645049110971e-05, "loss": 3.0751, "step": 872 }, { "epoch": 0.5004652494452795, "grad_norm": 0.7712786197662354, "learning_rate": 8.94784352001769e-05, "loss": 3.1086, "step": 874 }, { "epoch": 0.5016104788490444, "grad_norm": 0.6952617168426514, "learning_rate": 8.94202784259924e-05, "loss": 3.13, "step": 876 }, { "epoch": 0.5027557082528094, "grad_norm": 0.7420851588249207, "learning_rate": 8.936198037697916e-05, "loss": 3.1094, "step": 878 }, { "epoch": 0.5039009376565743, "grad_norm": 0.6883806586265564, "learning_rate": 8.930354126206634e-05, "loss": 3.0722, "step": 880 }, { "epoch": 0.5050461670603392, "grad_norm": 0.7546491026878357, "learning_rate": 8.92449612906887e-05, "loss": 3.1571, "step": 882 }, { "epoch": 0.5061913964641043, "grad_norm": 0.7471094727516174, "learning_rate": 8.918624067278576e-05, "loss": 3.1842, "step": 884 }, { "epoch": 0.5073366258678692, "grad_norm": 0.8344042897224426, "learning_rate": 8.912737961880116e-05, "loss": 3.1709, "step": 886 }, { "epoch": 0.5084818552716341, "grad_norm": 0.6555135250091553, "learning_rate": 8.906837833968174e-05, "loss": 3.1777, "step": 888 }, { "epoch": 0.5096270846753991, "grad_norm": 0.799281120300293, "learning_rate": 8.900923704687697e-05, "loss": 3.176, "step": 890 }, { "epoch": 0.510772314079164, "grad_norm": 0.8266319632530212, "learning_rate": 8.894995595233809e-05, "loss": 3.1353, "step": 892 }, { "epoch": 0.511917543482929, "grad_norm": 0.7263309955596924, "learning_rate": 8.889053526851729e-05, "loss": 3.0824, "step": 894 }, { "epoch": 0.5130627728866939, "grad_norm": 0.7665941119194031, "learning_rate": 8.88309752083671e-05, "loss": 3.1808, "step": 896 }, { "epoch": 0.5142080022904588, "grad_norm": 0.7014003396034241, "learning_rate": 8.877127598533952e-05, "loss": 3.1158, "step": 898 }, { "epoch": 0.5153532316942238, "grad_norm": 0.6320556998252869, "learning_rate": 8.871143781338529e-05, "loss": 3.1276, "step": 900 }, { "epoch": 0.5164984610979887, "grad_norm": 0.8376429677009583, "learning_rate": 8.865146090695308e-05, "loss": 3.1422, "step": 902 }, { "epoch": 0.5176436905017536, "grad_norm": 0.6639658212661743, "learning_rate": 8.859134548098883e-05, "loss": 3.0622, "step": 904 }, { "epoch": 0.5187889199055186, "grad_norm": 0.6442060470581055, "learning_rate": 8.853109175093486e-05, "loss": 3.1206, "step": 906 }, { "epoch": 0.5199341493092835, "grad_norm": 0.6882277131080627, "learning_rate": 8.847069993272912e-05, "loss": 3.1315, "step": 908 }, { "epoch": 0.5210793787130484, "grad_norm": 0.8141956329345703, "learning_rate": 8.841017024280449e-05, "loss": 3.1498, "step": 910 }, { "epoch": 0.5222246081168134, "grad_norm": 0.6133621335029602, "learning_rate": 8.834950289808796e-05, "loss": 3.0971, "step": 912 }, { "epoch": 0.5233698375205783, "grad_norm": 0.6844592690467834, "learning_rate": 8.828869811599982e-05, "loss": 3.1408, "step": 914 }, { "epoch": 0.5245150669243432, "grad_norm": 0.7407364845275879, "learning_rate": 8.822775611445289e-05, "loss": 3.1356, "step": 916 }, { "epoch": 0.5256602963281082, "grad_norm": 0.7962344884872437, "learning_rate": 8.816667711185183e-05, "loss": 3.037, "step": 918 }, { "epoch": 0.5268055257318731, "grad_norm": 0.6615867018699646, "learning_rate": 8.81054613270922e-05, "loss": 3.119, "step": 920 }, { "epoch": 0.5279507551356382, "grad_norm": 0.6886764168739319, "learning_rate": 8.804410897955986e-05, "loss": 3.1686, "step": 922 }, { "epoch": 0.5290959845394031, "grad_norm": 0.6714747548103333, "learning_rate": 8.798262028913e-05, "loss": 3.0539, "step": 924 }, { "epoch": 0.530241213943168, "grad_norm": 0.630648672580719, "learning_rate": 8.792099547616646e-05, "loss": 3.03, "step": 926 }, { "epoch": 0.531386443346933, "grad_norm": 0.6129744648933411, "learning_rate": 8.785923476152092e-05, "loss": 3.112, "step": 928 }, { "epoch": 0.5325316727506979, "grad_norm": 0.6656561493873596, "learning_rate": 8.779733836653213e-05, "loss": 3.0675, "step": 930 }, { "epoch": 0.5336769021544628, "grad_norm": 0.6855784058570862, "learning_rate": 8.773530651302506e-05, "loss": 3.0567, "step": 932 }, { "epoch": 0.5348221315582278, "grad_norm": 0.6233646869659424, "learning_rate": 8.767313942331016e-05, "loss": 3.1316, "step": 934 }, { "epoch": 0.5359673609619927, "grad_norm": 0.5950207710266113, "learning_rate": 8.761083732018253e-05, "loss": 3.0838, "step": 936 }, { "epoch": 0.5371125903657576, "grad_norm": 0.6548320651054382, "learning_rate": 8.754840042692114e-05, "loss": 3.128, "step": 938 }, { "epoch": 0.5382578197695226, "grad_norm": 0.5914682149887085, "learning_rate": 8.748582896728801e-05, "loss": 3.0995, "step": 940 }, { "epoch": 0.5394030491732875, "grad_norm": 0.8005509376525879, "learning_rate": 8.742312316552741e-05, "loss": 3.1194, "step": 942 }, { "epoch": 0.5405482785770525, "grad_norm": 0.6313744783401489, "learning_rate": 8.736028324636511e-05, "loss": 3.1332, "step": 944 }, { "epoch": 0.5416935079808174, "grad_norm": 0.6614211797714233, "learning_rate": 8.729730943500751e-05, "loss": 3.1808, "step": 946 }, { "epoch": 0.5428387373845823, "grad_norm": 0.6834341883659363, "learning_rate": 8.723420195714083e-05, "loss": 3.146, "step": 948 }, { "epoch": 0.5439839667883473, "grad_norm": 0.6552104353904724, "learning_rate": 8.717096103893034e-05, "loss": 3.1339, "step": 950 }, { "epoch": 0.5451291961921122, "grad_norm": 0.6134440302848816, "learning_rate": 8.710758690701957e-05, "loss": 3.1073, "step": 952 }, { "epoch": 0.5462744255958771, "grad_norm": 0.6617953181266785, "learning_rate": 8.704407978852941e-05, "loss": 3.0803, "step": 954 }, { "epoch": 0.5474196549996421, "grad_norm": 0.6200254559516907, "learning_rate": 8.698043991105738e-05, "loss": 3.0902, "step": 956 }, { "epoch": 0.548564884403407, "grad_norm": 0.766797661781311, "learning_rate": 8.691666750267677e-05, "loss": 3.0533, "step": 958 }, { "epoch": 0.549710113807172, "grad_norm": 0.8062739372253418, "learning_rate": 8.685276279193583e-05, "loss": 3.074, "step": 960 }, { "epoch": 0.550855343210937, "grad_norm": 0.7662980556488037, "learning_rate": 8.678872600785702e-05, "loss": 3.0567, "step": 962 }, { "epoch": 0.5520005726147019, "grad_norm": 0.610495388507843, "learning_rate": 8.672455737993601e-05, "loss": 3.0505, "step": 964 }, { "epoch": 0.5531458020184669, "grad_norm": 0.7125016450881958, "learning_rate": 8.666025713814106e-05, "loss": 3.1392, "step": 966 }, { "epoch": 0.5542910314222318, "grad_norm": 0.6135743260383606, "learning_rate": 8.65958255129121e-05, "loss": 3.0789, "step": 968 }, { "epoch": 0.5554362608259967, "grad_norm": 0.5998417735099792, "learning_rate": 8.653126273515988e-05, "loss": 3.0702, "step": 970 }, { "epoch": 0.5565814902297617, "grad_norm": 0.7088379859924316, "learning_rate": 8.64665690362652e-05, "loss": 3.084, "step": 972 }, { "epoch": 0.5577267196335266, "grad_norm": 0.6542948484420776, "learning_rate": 8.640174464807805e-05, "loss": 3.1164, "step": 974 }, { "epoch": 0.5588719490372915, "grad_norm": 0.5674989819526672, "learning_rate": 8.63367898029168e-05, "loss": 3.1048, "step": 976 }, { "epoch": 0.5600171784410565, "grad_norm": 0.6628077626228333, "learning_rate": 8.627170473356733e-05, "loss": 3.0492, "step": 978 }, { "epoch": 0.5611624078448214, "grad_norm": 0.6939430832862854, "learning_rate": 8.620648967328224e-05, "loss": 3.1041, "step": 980 }, { "epoch": 0.5623076372485863, "grad_norm": 0.7876750230789185, "learning_rate": 8.614114485577996e-05, "loss": 3.1109, "step": 982 }, { "epoch": 0.5634528666523513, "grad_norm": 0.7786777019500732, "learning_rate": 8.607567051524399e-05, "loss": 3.0289, "step": 984 }, { "epoch": 0.5645980960561162, "grad_norm": 0.6663212180137634, "learning_rate": 8.601006688632199e-05, "loss": 3.0807, "step": 986 }, { "epoch": 0.5657433254598812, "grad_norm": 0.7165863513946533, "learning_rate": 8.594433420412496e-05, "loss": 3.0755, "step": 988 }, { "epoch": 0.5668885548636461, "grad_norm": 0.7116391658782959, "learning_rate": 8.587847270422642e-05, "loss": 3.064, "step": 990 }, { "epoch": 0.568033784267411, "grad_norm": 0.7216659188270569, "learning_rate": 8.581248262266155e-05, "loss": 3.0844, "step": 992 }, { "epoch": 0.569179013671176, "grad_norm": 0.600975751876831, "learning_rate": 8.57463641959263e-05, "loss": 2.9771, "step": 994 }, { "epoch": 0.5703242430749409, "grad_norm": 0.6743506789207458, "learning_rate": 8.568011766097666e-05, "loss": 3.1177, "step": 996 }, { "epoch": 0.5714694724787059, "grad_norm": 0.6986669301986694, "learning_rate": 8.561374325522764e-05, "loss": 3.0838, "step": 998 }, { "epoch": 0.5726147018824709, "grad_norm": 0.8114129900932312, "learning_rate": 8.554724121655262e-05, "loss": 3.1444, "step": 1000 }, { "epoch": 0.5737599312862358, "grad_norm": 0.7919934988021851, "learning_rate": 8.548061178328233e-05, "loss": 3.0166, "step": 1002 }, { "epoch": 0.5749051606900007, "grad_norm": 0.6979469656944275, "learning_rate": 8.541385519420403e-05, "loss": 3.0737, "step": 1004 }, { "epoch": 0.5760503900937657, "grad_norm": 0.6499598026275635, "learning_rate": 8.534697168856076e-05, "loss": 3.0649, "step": 1006 }, { "epoch": 0.5771956194975306, "grad_norm": 0.7335128784179688, "learning_rate": 8.527996150605034e-05, "loss": 3.0403, "step": 1008 }, { "epoch": 0.5783408489012956, "grad_norm": 0.7250447869300842, "learning_rate": 8.521282488682463e-05, "loss": 3.0069, "step": 1010 }, { "epoch": 0.5794860783050605, "grad_norm": 0.6807704567909241, "learning_rate": 8.514556207148857e-05, "loss": 3.119, "step": 1012 }, { "epoch": 0.5806313077088254, "grad_norm": 0.7379552721977234, "learning_rate": 8.507817330109936e-05, "loss": 3.0773, "step": 1014 }, { "epoch": 0.5817765371125904, "grad_norm": 0.6113300919532776, "learning_rate": 8.501065881716566e-05, "loss": 3.0768, "step": 1016 }, { "epoch": 0.5829217665163553, "grad_norm": 0.6463739275932312, "learning_rate": 8.494301886164658e-05, "loss": 3.0759, "step": 1018 }, { "epoch": 0.5840669959201202, "grad_norm": 0.6680572032928467, "learning_rate": 8.487525367695098e-05, "loss": 3.032, "step": 1020 }, { "epoch": 0.5852122253238852, "grad_norm": 0.7283656597137451, "learning_rate": 8.480736350593644e-05, "loss": 3.0986, "step": 1022 }, { "epoch": 0.5863574547276501, "grad_norm": 0.6844098567962646, "learning_rate": 8.473934859190853e-05, "loss": 3.0703, "step": 1024 }, { "epoch": 0.587502684131415, "grad_norm": 0.6737761497497559, "learning_rate": 8.467120917861984e-05, "loss": 3.0775, "step": 1026 }, { "epoch": 0.58864791353518, "grad_norm": 0.8135201930999756, "learning_rate": 8.460294551026916e-05, "loss": 3.0802, "step": 1028 }, { "epoch": 0.5897931429389449, "grad_norm": 0.6999467015266418, "learning_rate": 8.453455783150054e-05, "loss": 3.047, "step": 1030 }, { "epoch": 0.5909383723427099, "grad_norm": 0.7999339699745178, "learning_rate": 8.446604638740256e-05, "loss": 3.1247, "step": 1032 }, { "epoch": 0.5920836017464748, "grad_norm": 0.7229709029197693, "learning_rate": 8.439741142350725e-05, "loss": 3.1009, "step": 1034 }, { "epoch": 0.5932288311502398, "grad_norm": 0.7535393238067627, "learning_rate": 8.432865318578935e-05, "loss": 3.0566, "step": 1036 }, { "epoch": 0.5943740605540048, "grad_norm": 0.7364835143089294, "learning_rate": 8.425977192066539e-05, "loss": 3.0751, "step": 1038 }, { "epoch": 0.5955192899577697, "grad_norm": 0.7996159791946411, "learning_rate": 8.419076787499283e-05, "loss": 3.1277, "step": 1040 }, { "epoch": 0.5966645193615346, "grad_norm": 0.6993304491043091, "learning_rate": 8.412164129606911e-05, "loss": 3.0713, "step": 1042 }, { "epoch": 0.5978097487652996, "grad_norm": 0.6996495127677917, "learning_rate": 8.405239243163084e-05, "loss": 3.0595, "step": 1044 }, { "epoch": 0.5989549781690645, "grad_norm": 0.6525830626487732, "learning_rate": 8.398302152985285e-05, "loss": 3.0666, "step": 1046 }, { "epoch": 0.6001002075728294, "grad_norm": 0.6092258095741272, "learning_rate": 8.391352883934733e-05, "loss": 3.0453, "step": 1048 }, { "epoch": 0.6012454369765944, "grad_norm": 0.7509777545928955, "learning_rate": 8.3843914609163e-05, "loss": 3.016, "step": 1050 }, { "epoch": 0.6023906663803593, "grad_norm": 0.6374807953834534, "learning_rate": 8.377417908878406e-05, "loss": 2.9986, "step": 1052 }, { "epoch": 0.6035358957841243, "grad_norm": 0.6541762351989746, "learning_rate": 8.370432252812946e-05, "loss": 3.0299, "step": 1054 }, { "epoch": 0.6046811251878892, "grad_norm": 0.5995933413505554, "learning_rate": 8.363434517755191e-05, "loss": 3.0853, "step": 1056 }, { "epoch": 0.6058263545916541, "grad_norm": 0.7466599941253662, "learning_rate": 8.356424728783702e-05, "loss": 3.1482, "step": 1058 }, { "epoch": 0.6069715839954191, "grad_norm": 0.7076915502548218, "learning_rate": 8.349402911020234e-05, "loss": 3.0681, "step": 1060 }, { "epoch": 0.608116813399184, "grad_norm": 0.6121742725372314, "learning_rate": 8.34236908962966e-05, "loss": 3.0771, "step": 1062 }, { "epoch": 0.6092620428029489, "grad_norm": 0.7337540984153748, "learning_rate": 8.335323289819865e-05, "loss": 3.1927, "step": 1064 }, { "epoch": 0.6104072722067139, "grad_norm": 0.6119634509086609, "learning_rate": 8.328265536841662e-05, "loss": 3.0988, "step": 1066 }, { "epoch": 0.6115525016104788, "grad_norm": 0.6291252970695496, "learning_rate": 8.321195855988706e-05, "loss": 3.0667, "step": 1068 }, { "epoch": 0.6126977310142437, "grad_norm": 0.5802082419395447, "learning_rate": 8.314114272597398e-05, "loss": 3.0118, "step": 1070 }, { "epoch": 0.6138429604180088, "grad_norm": 0.6166985630989075, "learning_rate": 8.307020812046792e-05, "loss": 3.1762, "step": 1072 }, { "epoch": 0.6149881898217737, "grad_norm": 0.5751842260360718, "learning_rate": 8.299915499758514e-05, "loss": 3.0107, "step": 1074 }, { "epoch": 0.6161334192255387, "grad_norm": 0.6163948178291321, "learning_rate": 8.292798361196658e-05, "loss": 3.0617, "step": 1076 }, { "epoch": 0.6172786486293036, "grad_norm": 0.6665089130401611, "learning_rate": 8.285669421867703e-05, "loss": 3.0729, "step": 1078 }, { "epoch": 0.6184238780330685, "grad_norm": 0.630814254283905, "learning_rate": 8.278528707320421e-05, "loss": 2.9811, "step": 1080 }, { "epoch": 0.6195691074368335, "grad_norm": 0.785892903804779, "learning_rate": 8.271376243145786e-05, "loss": 3.0561, "step": 1082 }, { "epoch": 0.6207143368405984, "grad_norm": 0.6047619581222534, "learning_rate": 8.264212054976875e-05, "loss": 3.0595, "step": 1084 }, { "epoch": 0.6218595662443633, "grad_norm": 0.6675294041633606, "learning_rate": 8.257036168488785e-05, "loss": 3.1725, "step": 1086 }, { "epoch": 0.6230047956481283, "grad_norm": 0.6342408657073975, "learning_rate": 8.24984860939854e-05, "loss": 2.9766, "step": 1088 }, { "epoch": 0.6241500250518932, "grad_norm": 0.5901287794113159, "learning_rate": 8.242649403464989e-05, "loss": 3.1021, "step": 1090 }, { "epoch": 0.6252952544556581, "grad_norm": 0.5638805627822876, "learning_rate": 8.23543857648873e-05, "loss": 2.9866, "step": 1092 }, { "epoch": 0.6264404838594231, "grad_norm": 0.5482515692710876, "learning_rate": 8.228216154312001e-05, "loss": 3.0344, "step": 1094 }, { "epoch": 0.627585713263188, "grad_norm": 0.7258690595626831, "learning_rate": 8.2209821628186e-05, "loss": 3.0339, "step": 1096 }, { "epoch": 0.628730942666953, "grad_norm": 0.6626359820365906, "learning_rate": 8.213736627933786e-05, "loss": 3.1191, "step": 1098 }, { "epoch": 0.6298761720707179, "grad_norm": 0.5897409319877625, "learning_rate": 8.206479575624186e-05, "loss": 2.9604, "step": 1100 }, { "epoch": 0.6310214014744828, "grad_norm": 0.661314070224762, "learning_rate": 8.199211031897704e-05, "loss": 3.0568, "step": 1102 }, { "epoch": 0.6321666308782478, "grad_norm": 0.7244003415107727, "learning_rate": 8.191931022803427e-05, "loss": 3.0202, "step": 1104 }, { "epoch": 0.6333118602820127, "grad_norm": 0.6676930785179138, "learning_rate": 8.184639574431532e-05, "loss": 3.0692, "step": 1106 }, { "epoch": 0.6344570896857776, "grad_norm": 0.8161568641662598, "learning_rate": 8.177336712913194e-05, "loss": 3.0835, "step": 1108 }, { "epoch": 0.6356023190895427, "grad_norm": 0.9007164239883423, "learning_rate": 8.170022464420486e-05, "loss": 3.0665, "step": 1110 }, { "epoch": 0.6367475484933076, "grad_norm": 0.7467122673988342, "learning_rate": 8.162696855166294e-05, "loss": 3.1098, "step": 1112 }, { "epoch": 0.6378927778970725, "grad_norm": 0.5917842388153076, "learning_rate": 8.155359911404217e-05, "loss": 3.0047, "step": 1114 }, { "epoch": 0.6390380073008375, "grad_norm": 0.7426056861877441, "learning_rate": 8.148011659428474e-05, "loss": 3.1037, "step": 1116 }, { "epoch": 0.6401832367046024, "grad_norm": 0.8367446660995483, "learning_rate": 8.140652125573813e-05, "loss": 2.9628, "step": 1118 }, { "epoch": 0.6413284661083674, "grad_norm": 0.6199979186058044, "learning_rate": 8.133281336215412e-05, "loss": 3.0239, "step": 1120 }, { "epoch": 0.6424736955121323, "grad_norm": 0.7761691808700562, "learning_rate": 8.125899317768786e-05, "loss": 3.0609, "step": 1122 }, { "epoch": 0.6436189249158972, "grad_norm": 0.6743906140327454, "learning_rate": 8.118506096689698e-05, "loss": 3.0696, "step": 1124 }, { "epoch": 0.6447641543196622, "grad_norm": 0.6743597388267517, "learning_rate": 8.111101699474051e-05, "loss": 3.045, "step": 1126 }, { "epoch": 0.6459093837234271, "grad_norm": 0.6069556474685669, "learning_rate": 8.103686152657808e-05, "loss": 3.0171, "step": 1128 }, { "epoch": 0.647054613127192, "grad_norm": 0.5756711959838867, "learning_rate": 8.096259482816886e-05, "loss": 3.0161, "step": 1130 }, { "epoch": 0.648199842530957, "grad_norm": 0.6239808797836304, "learning_rate": 8.088821716567066e-05, "loss": 3.0887, "step": 1132 }, { "epoch": 0.6493450719347219, "grad_norm": 0.5236758589744568, "learning_rate": 8.081372880563898e-05, "loss": 2.9743, "step": 1134 }, { "epoch": 0.6504903013384868, "grad_norm": 0.6389586925506592, "learning_rate": 8.073913001502605e-05, "loss": 2.9972, "step": 1136 }, { "epoch": 0.6516355307422518, "grad_norm": 0.5799978971481323, "learning_rate": 8.066442106117978e-05, "loss": 3.0043, "step": 1138 }, { "epoch": 0.6527807601460167, "grad_norm": 0.6182774901390076, "learning_rate": 8.058960221184298e-05, "loss": 3.065, "step": 1140 }, { "epoch": 0.6539259895497816, "grad_norm": 0.5762799382209778, "learning_rate": 8.051467373515228e-05, "loss": 3.0374, "step": 1142 }, { "epoch": 0.6550712189535466, "grad_norm": 0.6707761287689209, "learning_rate": 8.043963589963714e-05, "loss": 3.0056, "step": 1144 }, { "epoch": 0.6562164483573115, "grad_norm": 0.6148689985275269, "learning_rate": 8.036448897421903e-05, "loss": 3.0222, "step": 1146 }, { "epoch": 0.6573616777610766, "grad_norm": 0.6503751277923584, "learning_rate": 8.028923322821031e-05, "loss": 3.0186, "step": 1148 }, { "epoch": 0.6585069071648415, "grad_norm": 0.7160323858261108, "learning_rate": 8.021386893131334e-05, "loss": 3.0785, "step": 1150 }, { "epoch": 0.6596521365686064, "grad_norm": 0.5561687350273132, "learning_rate": 8.013839635361953e-05, "loss": 3.0425, "step": 1152 }, { "epoch": 0.6607973659723714, "grad_norm": 0.7166488170623779, "learning_rate": 8.006281576560834e-05, "loss": 2.9722, "step": 1154 }, { "epoch": 0.6619425953761363, "grad_norm": 0.5913854241371155, "learning_rate": 7.99871274381463e-05, "loss": 2.9645, "step": 1156 }, { "epoch": 0.6630878247799012, "grad_norm": 0.5721243619918823, "learning_rate": 7.99113316424861e-05, "loss": 3.0326, "step": 1158 }, { "epoch": 0.6642330541836662, "grad_norm": 0.6886599659919739, "learning_rate": 7.983542865026552e-05, "loss": 2.9934, "step": 1160 }, { "epoch": 0.6653782835874311, "grad_norm": 0.6505165100097656, "learning_rate": 7.975941873350656e-05, "loss": 3.0275, "step": 1162 }, { "epoch": 0.666523512991196, "grad_norm": 0.5963652729988098, "learning_rate": 7.968330216461439e-05, "loss": 3.0581, "step": 1164 }, { "epoch": 0.667668742394961, "grad_norm": 0.7020843029022217, "learning_rate": 7.960707921637642e-05, "loss": 3.0214, "step": 1166 }, { "epoch": 0.6688139717987259, "grad_norm": 0.5729818344116211, "learning_rate": 7.953075016196128e-05, "loss": 3.0928, "step": 1168 }, { "epoch": 0.6699592012024909, "grad_norm": 0.6135843992233276, "learning_rate": 7.945431527491788e-05, "loss": 3.0281, "step": 1170 }, { "epoch": 0.6711044306062558, "grad_norm": 0.844972550868988, "learning_rate": 7.937777482917441e-05, "loss": 3.0451, "step": 1172 }, { "epoch": 0.6722496600100207, "grad_norm": 0.6200757026672363, "learning_rate": 7.930112909903737e-05, "loss": 2.9982, "step": 1174 }, { "epoch": 0.6733948894137857, "grad_norm": 0.6621441841125488, "learning_rate": 7.922437835919059e-05, "loss": 3.0133, "step": 1176 }, { "epoch": 0.6745401188175506, "grad_norm": 0.6099239587783813, "learning_rate": 7.914752288469418e-05, "loss": 3.0359, "step": 1178 }, { "epoch": 0.6756853482213155, "grad_norm": 0.6415863633155823, "learning_rate": 7.907056295098367e-05, "loss": 3.0456, "step": 1180 }, { "epoch": 0.6768305776250805, "grad_norm": 0.5651492476463318, "learning_rate": 7.89934988338689e-05, "loss": 3.0138, "step": 1182 }, { "epoch": 0.6779758070288454, "grad_norm": 0.6899843215942383, "learning_rate": 7.891633080953309e-05, "loss": 3.1091, "step": 1184 }, { "epoch": 0.6791210364326105, "grad_norm": 0.6236230134963989, "learning_rate": 7.883905915453191e-05, "loss": 3.0477, "step": 1186 }, { "epoch": 0.6802662658363754, "grad_norm": 0.6632122993469238, "learning_rate": 7.876168414579232e-05, "loss": 3.0023, "step": 1188 }, { "epoch": 0.6814114952401403, "grad_norm": 0.5697975158691406, "learning_rate": 7.868420606061174e-05, "loss": 3.0046, "step": 1190 }, { "epoch": 0.6825567246439053, "grad_norm": 0.6847530603408813, "learning_rate": 7.8606625176657e-05, "loss": 3.0155, "step": 1192 }, { "epoch": 0.6837019540476702, "grad_norm": 0.6649438738822937, "learning_rate": 7.852894177196333e-05, "loss": 3.0616, "step": 1194 }, { "epoch": 0.6848471834514351, "grad_norm": 0.6214346289634705, "learning_rate": 7.845115612493335e-05, "loss": 2.963, "step": 1196 }, { "epoch": 0.6859924128552001, "grad_norm": 0.6587514877319336, "learning_rate": 7.837326851433614e-05, "loss": 3.0344, "step": 1198 }, { "epoch": 0.687137642258965, "grad_norm": 0.6454896330833435, "learning_rate": 7.829527921930617e-05, "loss": 3.0191, "step": 1200 }, { "epoch": 0.68828287166273, "grad_norm": 0.7053160071372986, "learning_rate": 7.821718851934235e-05, "loss": 2.9888, "step": 1202 }, { "epoch": 0.6894281010664949, "grad_norm": 0.6341421604156494, "learning_rate": 7.813899669430695e-05, "loss": 3.0688, "step": 1204 }, { "epoch": 0.6905733304702598, "grad_norm": 0.5567854642868042, "learning_rate": 7.806070402442476e-05, "loss": 3.0409, "step": 1206 }, { "epoch": 0.6917185598740248, "grad_norm": 0.5970193147659302, "learning_rate": 7.798231079028186e-05, "loss": 3.0569, "step": 1208 }, { "epoch": 0.6928637892777897, "grad_norm": 0.6818400621414185, "learning_rate": 7.79038172728248e-05, "loss": 2.9987, "step": 1210 }, { "epoch": 0.6940090186815546, "grad_norm": 0.7994418740272522, "learning_rate": 7.782522375335954e-05, "loss": 2.9835, "step": 1212 }, { "epoch": 0.6951542480853196, "grad_norm": 0.7498995065689087, "learning_rate": 7.774653051355039e-05, "loss": 3.0296, "step": 1214 }, { "epoch": 0.6962994774890845, "grad_norm": 0.6866205930709839, "learning_rate": 7.766773783541902e-05, "loss": 3.0301, "step": 1216 }, { "epoch": 0.6974447068928494, "grad_norm": 0.6896367073059082, "learning_rate": 7.758884600134354e-05, "loss": 3.0219, "step": 1218 }, { "epoch": 0.6985899362966144, "grad_norm": 0.668050229549408, "learning_rate": 7.750985529405736e-05, "loss": 3.0341, "step": 1220 }, { "epoch": 0.6997351657003793, "grad_norm": 0.666374146938324, "learning_rate": 7.743076599664824e-05, "loss": 3.0484, "step": 1222 }, { "epoch": 0.7008803951041443, "grad_norm": 0.7941027283668518, "learning_rate": 7.735157839255728e-05, "loss": 3.0032, "step": 1224 }, { "epoch": 0.7020256245079093, "grad_norm": 0.6927073001861572, "learning_rate": 7.727229276557791e-05, "loss": 3.0483, "step": 1226 }, { "epoch": 0.7031708539116742, "grad_norm": 0.6417405009269714, "learning_rate": 7.71929093998548e-05, "loss": 3.0354, "step": 1228 }, { "epoch": 0.7043160833154392, "grad_norm": 0.6400049328804016, "learning_rate": 7.711342857988295e-05, "loss": 3.013, "step": 1230 }, { "epoch": 0.7054613127192041, "grad_norm": 0.6526817083358765, "learning_rate": 7.703385059050662e-05, "loss": 3.1439, "step": 1232 }, { "epoch": 0.706606542122969, "grad_norm": 0.5762894749641418, "learning_rate": 7.695417571691825e-05, "loss": 2.9981, "step": 1234 }, { "epoch": 0.707751771526734, "grad_norm": 0.6319015622138977, "learning_rate": 7.687440424465755e-05, "loss": 3.0592, "step": 1236 }, { "epoch": 0.7088970009304989, "grad_norm": 0.6258828043937683, "learning_rate": 7.679453645961039e-05, "loss": 3.0415, "step": 1238 }, { "epoch": 0.7100422303342638, "grad_norm": 0.6156604290008545, "learning_rate": 7.671457264800784e-05, "loss": 3.0128, "step": 1240 }, { "epoch": 0.7111874597380288, "grad_norm": 0.6246117353439331, "learning_rate": 7.663451309642509e-05, "loss": 3.0324, "step": 1242 }, { "epoch": 0.7123326891417937, "grad_norm": 0.5683282017707825, "learning_rate": 7.65543580917804e-05, "loss": 2.9937, "step": 1244 }, { "epoch": 0.7134779185455586, "grad_norm": 0.7062090635299683, "learning_rate": 7.647410792133422e-05, "loss": 3.0414, "step": 1246 }, { "epoch": 0.7146231479493236, "grad_norm": 0.6515636444091797, "learning_rate": 7.639376287268798e-05, "loss": 3.029, "step": 1248 }, { "epoch": 0.7157683773530885, "grad_norm": 0.6117521524429321, "learning_rate": 7.631332323378314e-05, "loss": 3.0049, "step": 1250 }, { "epoch": 0.7169136067568534, "grad_norm": 0.7275700569152832, "learning_rate": 7.623278929290013e-05, "loss": 3.0346, "step": 1252 }, { "epoch": 0.7180588361606184, "grad_norm": 0.6181052923202515, "learning_rate": 7.615216133865744e-05, "loss": 2.9997, "step": 1254 }, { "epoch": 0.7192040655643833, "grad_norm": 0.5979108810424805, "learning_rate": 7.607143966001041e-05, "loss": 3.0351, "step": 1256 }, { "epoch": 0.7203492949681483, "grad_norm": 0.6783589720726013, "learning_rate": 7.599062454625023e-05, "loss": 2.9636, "step": 1258 }, { "epoch": 0.7214945243719133, "grad_norm": 0.6468908190727234, "learning_rate": 7.590971628700305e-05, "loss": 3.0285, "step": 1260 }, { "epoch": 0.7226397537756782, "grad_norm": 0.6515734195709229, "learning_rate": 7.582871517222876e-05, "loss": 3.0494, "step": 1262 }, { "epoch": 0.7237849831794432, "grad_norm": 0.6727483868598938, "learning_rate": 7.574762149222007e-05, "loss": 3.08, "step": 1264 }, { "epoch": 0.7249302125832081, "grad_norm": 0.70179283618927, "learning_rate": 7.566643553760138e-05, "loss": 2.9622, "step": 1266 }, { "epoch": 0.726075441986973, "grad_norm": 0.7478881478309631, "learning_rate": 7.558515759932782e-05, "loss": 3.0535, "step": 1268 }, { "epoch": 0.727220671390738, "grad_norm": 0.7690526247024536, "learning_rate": 7.550378796868417e-05, "loss": 3.0019, "step": 1270 }, { "epoch": 0.7283659007945029, "grad_norm": 0.6407628059387207, "learning_rate": 7.542232693728379e-05, "loss": 2.9642, "step": 1272 }, { "epoch": 0.7295111301982679, "grad_norm": 0.5795389413833618, "learning_rate": 7.534077479706764e-05, "loss": 3.0628, "step": 1274 }, { "epoch": 0.7306563596020328, "grad_norm": 0.6217789053916931, "learning_rate": 7.525913184030319e-05, "loss": 3.0264, "step": 1276 }, { "epoch": 0.7318015890057977, "grad_norm": 0.6992785930633545, "learning_rate": 7.517739835958335e-05, "loss": 3.0133, "step": 1278 }, { "epoch": 0.7329468184095627, "grad_norm": 0.6179572343826294, "learning_rate": 7.509557464782546e-05, "loss": 2.9636, "step": 1280 }, { "epoch": 0.7340920478133276, "grad_norm": 0.5889167189598083, "learning_rate": 7.501366099827025e-05, "loss": 3.0598, "step": 1282 }, { "epoch": 0.7352372772170925, "grad_norm": 0.6687774062156677, "learning_rate": 7.493165770448078e-05, "loss": 3.0248, "step": 1284 }, { "epoch": 0.7363825066208575, "grad_norm": 0.6092299818992615, "learning_rate": 7.484956506034136e-05, "loss": 3.0009, "step": 1286 }, { "epoch": 0.7375277360246224, "grad_norm": 0.5790627598762512, "learning_rate": 7.476738336005647e-05, "loss": 2.9792, "step": 1288 }, { "epoch": 0.7386729654283873, "grad_norm": 0.5962207913398743, "learning_rate": 7.468511289814983e-05, "loss": 2.9984, "step": 1290 }, { "epoch": 0.7398181948321523, "grad_norm": 0.6334844827651978, "learning_rate": 7.460275396946323e-05, "loss": 3.041, "step": 1292 }, { "epoch": 0.7409634242359172, "grad_norm": 0.6076721549034119, "learning_rate": 7.45203068691555e-05, "loss": 3.0255, "step": 1294 }, { "epoch": 0.7421086536396821, "grad_norm": 0.7529584169387817, "learning_rate": 7.443777189270147e-05, "loss": 3.0043, "step": 1296 }, { "epoch": 0.7432538830434472, "grad_norm": 0.6646864414215088, "learning_rate": 7.435514933589089e-05, "loss": 3.0181, "step": 1298 }, { "epoch": 0.7443991124472121, "grad_norm": 0.6356727480888367, "learning_rate": 7.427243949482741e-05, "loss": 3.0157, "step": 1300 }, { "epoch": 0.7455443418509771, "grad_norm": 0.5824810266494751, "learning_rate": 7.418964266592744e-05, "loss": 2.9286, "step": 1302 }, { "epoch": 0.746689571254742, "grad_norm": 0.6385902166366577, "learning_rate": 7.410675914591921e-05, "loss": 2.9807, "step": 1304 }, { "epoch": 0.7478348006585069, "grad_norm": 0.7312979102134705, "learning_rate": 7.402378923184156e-05, "loss": 3.0521, "step": 1306 }, { "epoch": 0.7489800300622719, "grad_norm": 0.5312052965164185, "learning_rate": 7.394073322104298e-05, "loss": 2.8997, "step": 1308 }, { "epoch": 0.7501252594660368, "grad_norm": 0.6032156348228455, "learning_rate": 7.385759141118055e-05, "loss": 2.9969, "step": 1310 }, { "epoch": 0.7512704888698017, "grad_norm": 0.6151460409164429, "learning_rate": 7.377436410021878e-05, "loss": 2.9889, "step": 1312 }, { "epoch": 0.7524157182735667, "grad_norm": 0.6497369408607483, "learning_rate": 7.369105158642863e-05, "loss": 3.0514, "step": 1314 }, { "epoch": 0.7535609476773316, "grad_norm": 0.5319530367851257, "learning_rate": 7.360765416838643e-05, "loss": 2.9673, "step": 1316 }, { "epoch": 0.7547061770810966, "grad_norm": 0.6640217900276184, "learning_rate": 7.352417214497272e-05, "loss": 2.9538, "step": 1318 }, { "epoch": 0.7558514064848615, "grad_norm": 0.5952315926551819, "learning_rate": 7.344060581537134e-05, "loss": 3.0166, "step": 1320 }, { "epoch": 0.7569966358886264, "grad_norm": 0.6607936024665833, "learning_rate": 7.335695547906821e-05, "loss": 3.053, "step": 1322 }, { "epoch": 0.7581418652923914, "grad_norm": 0.5945944786071777, "learning_rate": 7.327322143585033e-05, "loss": 2.9579, "step": 1324 }, { "epoch": 0.7592870946961563, "grad_norm": 0.5982474088668823, "learning_rate": 7.318940398580467e-05, "loss": 3.0381, "step": 1326 }, { "epoch": 0.7604323240999212, "grad_norm": 0.557156503200531, "learning_rate": 7.310550342931714e-05, "loss": 2.9517, "step": 1328 }, { "epoch": 0.7615775535036862, "grad_norm": 0.519133985042572, "learning_rate": 7.30215200670715e-05, "loss": 2.95, "step": 1330 }, { "epoch": 0.7627227829074511, "grad_norm": 0.6734493970870972, "learning_rate": 7.293745420004823e-05, "loss": 3.0136, "step": 1332 }, { "epoch": 0.763868012311216, "grad_norm": 0.6518685817718506, "learning_rate": 7.28533061295235e-05, "loss": 2.9696, "step": 1334 }, { "epoch": 0.7650132417149811, "grad_norm": 0.5936789512634277, "learning_rate": 7.276907615706814e-05, "loss": 2.9877, "step": 1336 }, { "epoch": 0.766158471118746, "grad_norm": 0.5671294331550598, "learning_rate": 7.268476458454642e-05, "loss": 2.9858, "step": 1338 }, { "epoch": 0.767303700522511, "grad_norm": 0.5697634816169739, "learning_rate": 7.260037171411508e-05, "loss": 2.9434, "step": 1340 }, { "epoch": 0.7684489299262759, "grad_norm": 0.6007529497146606, "learning_rate": 7.251589784822224e-05, "loss": 2.9387, "step": 1342 }, { "epoch": 0.7695941593300408, "grad_norm": 0.5928565263748169, "learning_rate": 7.243134328960625e-05, "loss": 2.9275, "step": 1344 }, { "epoch": 0.7707393887338058, "grad_norm": 0.6670402884483337, "learning_rate": 7.234670834129469e-05, "loss": 2.9812, "step": 1346 }, { "epoch": 0.7718846181375707, "grad_norm": 0.6268984079360962, "learning_rate": 7.226199330660322e-05, "loss": 2.9359, "step": 1348 }, { "epoch": 0.7730298475413356, "grad_norm": 0.5983556509017944, "learning_rate": 7.217719848913451e-05, "loss": 2.9524, "step": 1350 }, { "epoch": 0.7741750769451006, "grad_norm": 0.5522739291191101, "learning_rate": 7.209232419277714e-05, "loss": 2.9182, "step": 1352 }, { "epoch": 0.7753203063488655, "grad_norm": 0.5227957367897034, "learning_rate": 7.20073707217046e-05, "loss": 2.9703, "step": 1354 }, { "epoch": 0.7764655357526304, "grad_norm": 0.484824538230896, "learning_rate": 7.192233838037403e-05, "loss": 2.985, "step": 1356 }, { "epoch": 0.7776107651563954, "grad_norm": 0.5953473448753357, "learning_rate": 7.183722747352531e-05, "loss": 2.9942, "step": 1358 }, { "epoch": 0.7787559945601603, "grad_norm": 0.550989031791687, "learning_rate": 7.175203830617983e-05, "loss": 2.9782, "step": 1360 }, { "epoch": 0.7799012239639252, "grad_norm": 0.5300703048706055, "learning_rate": 7.166677118363945e-05, "loss": 2.9471, "step": 1362 }, { "epoch": 0.7810464533676902, "grad_norm": 0.6735565066337585, "learning_rate": 7.158142641148546e-05, "loss": 2.9581, "step": 1364 }, { "epoch": 0.7821916827714551, "grad_norm": 0.6232147216796875, "learning_rate": 7.149600429557734e-05, "loss": 2.9921, "step": 1366 }, { "epoch": 0.7833369121752201, "grad_norm": 0.563504159450531, "learning_rate": 7.14105051420519e-05, "loss": 2.9923, "step": 1368 }, { "epoch": 0.784482141578985, "grad_norm": 0.5748158097267151, "learning_rate": 7.132492925732187e-05, "loss": 3.0145, "step": 1370 }, { "epoch": 0.7856273709827499, "grad_norm": 0.5430135130882263, "learning_rate": 7.12392769480751e-05, "loss": 2.9535, "step": 1372 }, { "epoch": 0.786772600386515, "grad_norm": 0.6344389319419861, "learning_rate": 7.115354852127324e-05, "loss": 3.0599, "step": 1374 }, { "epoch": 0.7879178297902799, "grad_norm": 0.6093223094940186, "learning_rate": 7.106774428415079e-05, "loss": 3.0051, "step": 1376 }, { "epoch": 0.7890630591940448, "grad_norm": 0.5969236493110657, "learning_rate": 7.098186454421393e-05, "loss": 2.9106, "step": 1378 }, { "epoch": 0.7902082885978098, "grad_norm": 0.5839152336120605, "learning_rate": 7.089590960923943e-05, "loss": 2.9917, "step": 1380 }, { "epoch": 0.7913535180015747, "grad_norm": 0.6248955130577087, "learning_rate": 7.080987978727349e-05, "loss": 3.0072, "step": 1382 }, { "epoch": 0.7924987474053397, "grad_norm": 0.6621485352516174, "learning_rate": 7.072377538663079e-05, "loss": 2.9616, "step": 1384 }, { "epoch": 0.7936439768091046, "grad_norm": 0.5360151529312134, "learning_rate": 7.063759671589319e-05, "loss": 3.0115, "step": 1386 }, { "epoch": 0.7947892062128695, "grad_norm": 0.6625118851661682, "learning_rate": 7.055134408390877e-05, "loss": 3.0551, "step": 1388 }, { "epoch": 0.7959344356166345, "grad_norm": 0.5611973404884338, "learning_rate": 7.046501779979068e-05, "loss": 2.9427, "step": 1390 }, { "epoch": 0.7970796650203994, "grad_norm": 0.6025654673576355, "learning_rate": 7.037861817291598e-05, "loss": 2.985, "step": 1392 }, { "epoch": 0.7982248944241643, "grad_norm": 0.7166057229042053, "learning_rate": 7.029214551292465e-05, "loss": 3.006, "step": 1394 }, { "epoch": 0.7993701238279293, "grad_norm": 0.586137056350708, "learning_rate": 7.020560012971832e-05, "loss": 2.9835, "step": 1396 }, { "epoch": 0.8005153532316942, "grad_norm": 0.7084041833877563, "learning_rate": 7.011898233345931e-05, "loss": 2.9931, "step": 1398 }, { "epoch": 0.8016605826354591, "grad_norm": 0.6771812438964844, "learning_rate": 7.003229243456944e-05, "loss": 3.0061, "step": 1400 }, { "epoch": 0.8028058120392241, "grad_norm": 0.6903461813926697, "learning_rate": 6.994553074372891e-05, "loss": 2.977, "step": 1402 }, { "epoch": 0.803951041442989, "grad_norm": 0.6033274531364441, "learning_rate": 6.985869757187523e-05, "loss": 2.935, "step": 1404 }, { "epoch": 0.805096270846754, "grad_norm": 0.6533209085464478, "learning_rate": 6.977179323020207e-05, "loss": 3.0233, "step": 1406 }, { "epoch": 0.8062415002505189, "grad_norm": 0.5902218222618103, "learning_rate": 6.96848180301582e-05, "loss": 3.0012, "step": 1408 }, { "epoch": 0.8073867296542838, "grad_norm": 0.6266187429428101, "learning_rate": 6.959777228344628e-05, "loss": 2.9518, "step": 1410 }, { "epoch": 0.8085319590580489, "grad_norm": 0.5876622200012207, "learning_rate": 6.95106563020218e-05, "loss": 2.9377, "step": 1412 }, { "epoch": 0.8096771884618138, "grad_norm": 0.5977484583854675, "learning_rate": 6.942347039809201e-05, "loss": 2.9814, "step": 1414 }, { "epoch": 0.8108224178655787, "grad_norm": 0.64298015832901, "learning_rate": 6.933621488411468e-05, "loss": 3.0269, "step": 1416 }, { "epoch": 0.8119676472693437, "grad_norm": 0.5676386952400208, "learning_rate": 6.924889007279712e-05, "loss": 2.9237, "step": 1418 }, { "epoch": 0.8131128766731086, "grad_norm": 0.5827841758728027, "learning_rate": 6.916149627709494e-05, "loss": 2.867, "step": 1420 }, { "epoch": 0.8142581060768735, "grad_norm": 0.6391154527664185, "learning_rate": 6.907403381021097e-05, "loss": 2.9802, "step": 1422 }, { "epoch": 0.8154033354806385, "grad_norm": 0.5791281461715698, "learning_rate": 6.89865029855942e-05, "loss": 2.9945, "step": 1424 }, { "epoch": 0.8165485648844034, "grad_norm": 0.5360828638076782, "learning_rate": 6.88989041169385e-05, "loss": 2.9882, "step": 1426 }, { "epoch": 0.8176937942881684, "grad_norm": 0.5865568518638611, "learning_rate": 6.881123751818175e-05, "loss": 2.9096, "step": 1428 }, { "epoch": 0.8188390236919333, "grad_norm": 0.5686046481132507, "learning_rate": 6.87235035035044e-05, "loss": 3.0026, "step": 1430 }, { "epoch": 0.8199842530956982, "grad_norm": 0.6230041980743408, "learning_rate": 6.863570238732862e-05, "loss": 2.9601, "step": 1432 }, { "epoch": 0.8211294824994632, "grad_norm": 0.61517733335495, "learning_rate": 6.854783448431702e-05, "loss": 2.9286, "step": 1434 }, { "epoch": 0.8222747119032281, "grad_norm": 0.5126785635948181, "learning_rate": 6.845990010937152e-05, "loss": 2.9896, "step": 1436 }, { "epoch": 0.823419941306993, "grad_norm": 0.5101936459541321, "learning_rate": 6.837189957763234e-05, "loss": 2.9612, "step": 1438 }, { "epoch": 0.824565170710758, "grad_norm": 0.6595968008041382, "learning_rate": 6.828383320447675e-05, "loss": 3.0712, "step": 1440 }, { "epoch": 0.8257104001145229, "grad_norm": 0.5970346927642822, "learning_rate": 6.8195701305518e-05, "loss": 2.9534, "step": 1442 }, { "epoch": 0.8268556295182878, "grad_norm": 0.645318865776062, "learning_rate": 6.810750419660415e-05, "loss": 2.9673, "step": 1444 }, { "epoch": 0.8280008589220528, "grad_norm": 0.6072613596916199, "learning_rate": 6.801924219381695e-05, "loss": 2.9525, "step": 1446 }, { "epoch": 0.8291460883258178, "grad_norm": 0.5918760299682617, "learning_rate": 6.793091561347078e-05, "loss": 2.961, "step": 1448 }, { "epoch": 0.8302913177295828, "grad_norm": 0.6208258271217346, "learning_rate": 6.784252477211138e-05, "loss": 2.9874, "step": 1450 }, { "epoch": 0.8314365471333477, "grad_norm": 0.601787805557251, "learning_rate": 6.775406998651484e-05, "loss": 2.9412, "step": 1452 }, { "epoch": 0.8325817765371126, "grad_norm": 0.5628416538238525, "learning_rate": 6.76655515736864e-05, "loss": 2.9098, "step": 1454 }, { "epoch": 0.8337270059408776, "grad_norm": 0.559907078742981, "learning_rate": 6.757696985085931e-05, "loss": 3.0108, "step": 1456 }, { "epoch": 0.8348722353446425, "grad_norm": 0.5879938006401062, "learning_rate": 6.748832513549373e-05, "loss": 2.9376, "step": 1458 }, { "epoch": 0.8360174647484074, "grad_norm": 0.581619143486023, "learning_rate": 6.739961774527557e-05, "loss": 2.9491, "step": 1460 }, { "epoch": 0.8371626941521724, "grad_norm": 0.686090350151062, "learning_rate": 6.731084799811536e-05, "loss": 2.9458, "step": 1462 }, { "epoch": 0.8383079235559373, "grad_norm": 0.6862696409225464, "learning_rate": 6.72220162121471e-05, "loss": 3.0272, "step": 1464 }, { "epoch": 0.8394531529597022, "grad_norm": 0.5614147782325745, "learning_rate": 6.713312270572711e-05, "loss": 2.9941, "step": 1466 }, { "epoch": 0.8405983823634672, "grad_norm": 0.6119958162307739, "learning_rate": 6.704416779743295e-05, "loss": 2.9241, "step": 1468 }, { "epoch": 0.8417436117672321, "grad_norm": 0.5873327851295471, "learning_rate": 6.695515180606217e-05, "loss": 2.9539, "step": 1470 }, { "epoch": 0.842888841170997, "grad_norm": 0.5834409594535828, "learning_rate": 6.686607505063127e-05, "loss": 2.981, "step": 1472 }, { "epoch": 0.844034070574762, "grad_norm": 0.6550679206848145, "learning_rate": 6.677693785037452e-05, "loss": 2.9575, "step": 1474 }, { "epoch": 0.8451792999785269, "grad_norm": 0.7193329930305481, "learning_rate": 6.668774052474278e-05, "loss": 2.9882, "step": 1476 }, { "epoch": 0.8463245293822919, "grad_norm": 0.741956353187561, "learning_rate": 6.659848339340243e-05, "loss": 2.9814, "step": 1478 }, { "epoch": 0.8474697587860568, "grad_norm": 0.7412140369415283, "learning_rate": 6.650916677623415e-05, "loss": 2.9989, "step": 1480 }, { "epoch": 0.8486149881898217, "grad_norm": 0.709784984588623, "learning_rate": 6.641979099333183e-05, "loss": 2.9654, "step": 1482 }, { "epoch": 0.8497602175935867, "grad_norm": 0.6807482838630676, "learning_rate": 6.633035636500137e-05, "loss": 2.9919, "step": 1484 }, { "epoch": 0.8509054469973517, "grad_norm": 0.6146615147590637, "learning_rate": 6.62408632117596e-05, "loss": 2.9711, "step": 1486 }, { "epoch": 0.8520506764011166, "grad_norm": 0.4998982846736908, "learning_rate": 6.615131185433306e-05, "loss": 2.9859, "step": 1488 }, { "epoch": 0.8531959058048816, "grad_norm": 0.577151358127594, "learning_rate": 6.606170261365689e-05, "loss": 3.0008, "step": 1490 }, { "epoch": 0.8543411352086465, "grad_norm": 0.5823954939842224, "learning_rate": 6.597203581087367e-05, "loss": 2.9355, "step": 1492 }, { "epoch": 0.8554863646124115, "grad_norm": 0.5608075857162476, "learning_rate": 6.588231176733228e-05, "loss": 2.9701, "step": 1494 }, { "epoch": 0.8566315940161764, "grad_norm": 0.5515905618667603, "learning_rate": 6.579253080458676e-05, "loss": 2.9437, "step": 1496 }, { "epoch": 0.8577768234199413, "grad_norm": 0.5510479211807251, "learning_rate": 6.570269324439509e-05, "loss": 2.889, "step": 1498 }, { "epoch": 0.8589220528237063, "grad_norm": 0.5867220163345337, "learning_rate": 6.561279940871809e-05, "loss": 2.9781, "step": 1500 }, { "epoch": 0.8600672822274712, "grad_norm": 0.5506752133369446, "learning_rate": 6.552284961971834e-05, "loss": 2.9802, "step": 1502 }, { "epoch": 0.8612125116312361, "grad_norm": 0.6004698872566223, "learning_rate": 6.543284419975884e-05, "loss": 2.9445, "step": 1504 }, { "epoch": 0.8623577410350011, "grad_norm": 0.5422489047050476, "learning_rate": 6.5342783471402e-05, "loss": 2.9621, "step": 1506 }, { "epoch": 0.863502970438766, "grad_norm": 0.5704418420791626, "learning_rate": 6.52526677574085e-05, "loss": 2.9643, "step": 1508 }, { "epoch": 0.8646481998425309, "grad_norm": 0.6530454754829407, "learning_rate": 6.516249738073597e-05, "loss": 2.9549, "step": 1510 }, { "epoch": 0.8657934292462959, "grad_norm": 0.5491419434547424, "learning_rate": 6.507227266453806e-05, "loss": 2.9119, "step": 1512 }, { "epoch": 0.8669386586500608, "grad_norm": 0.5081238150596619, "learning_rate": 6.498199393216305e-05, "loss": 2.9302, "step": 1514 }, { "epoch": 0.8680838880538257, "grad_norm": 0.5380941033363342, "learning_rate": 6.48916615071529e-05, "loss": 2.8767, "step": 1516 }, { "epoch": 0.8692291174575907, "grad_norm": 0.5369439125061035, "learning_rate": 6.480127571324193e-05, "loss": 2.9488, "step": 1518 }, { "epoch": 0.8703743468613556, "grad_norm": 0.5196496248245239, "learning_rate": 6.471083687435575e-05, "loss": 2.9781, "step": 1520 }, { "epoch": 0.8715195762651206, "grad_norm": 0.5687289834022522, "learning_rate": 6.462034531461008e-05, "loss": 2.9714, "step": 1522 }, { "epoch": 0.8726648056688856, "grad_norm": 0.535955548286438, "learning_rate": 6.452980135830952e-05, "loss": 2.9167, "step": 1524 }, { "epoch": 0.8738100350726505, "grad_norm": 0.5874583721160889, "learning_rate": 6.443920532994658e-05, "loss": 2.9149, "step": 1526 }, { "epoch": 0.8749552644764155, "grad_norm": 0.6629985570907593, "learning_rate": 6.434855755420024e-05, "loss": 2.9535, "step": 1528 }, { "epoch": 0.8761004938801804, "grad_norm": 0.7059323787689209, "learning_rate": 6.425785835593503e-05, "loss": 2.9856, "step": 1530 }, { "epoch": 0.8772457232839453, "grad_norm": 0.6743423342704773, "learning_rate": 6.416710806019973e-05, "loss": 2.997, "step": 1532 }, { "epoch": 0.8783909526877103, "grad_norm": 0.5754982233047485, "learning_rate": 6.407630699222624e-05, "loss": 2.9411, "step": 1534 }, { "epoch": 0.8795361820914752, "grad_norm": 0.5872779488563538, "learning_rate": 6.398545547742846e-05, "loss": 2.9959, "step": 1536 }, { "epoch": 0.8806814114952402, "grad_norm": 0.5496954321861267, "learning_rate": 6.389455384140101e-05, "loss": 2.9483, "step": 1538 }, { "epoch": 0.8818266408990051, "grad_norm": 0.6256377100944519, "learning_rate": 6.380360240991821e-05, "loss": 2.8727, "step": 1540 }, { "epoch": 0.88297187030277, "grad_norm": 0.6160191893577576, "learning_rate": 6.37126015089328e-05, "loss": 2.9312, "step": 1542 }, { "epoch": 0.884117099706535, "grad_norm": 0.5502288937568665, "learning_rate": 6.362155146457478e-05, "loss": 3.0298, "step": 1544 }, { "epoch": 0.8852623291102999, "grad_norm": 0.5236011743545532, "learning_rate": 6.353045260315035e-05, "loss": 2.8955, "step": 1546 }, { "epoch": 0.8864075585140648, "grad_norm": 0.5943360924720764, "learning_rate": 6.34393052511406e-05, "loss": 2.9258, "step": 1548 }, { "epoch": 0.8875527879178298, "grad_norm": 0.5703767538070679, "learning_rate": 6.33481097352004e-05, "loss": 2.9084, "step": 1550 }, { "epoch": 0.8886980173215947, "grad_norm": 0.5543270707130432, "learning_rate": 6.325686638215724e-05, "loss": 2.9434, "step": 1552 }, { "epoch": 0.8898432467253596, "grad_norm": 0.6018961668014526, "learning_rate": 6.316557551901006e-05, "loss": 2.9177, "step": 1554 }, { "epoch": 0.8909884761291246, "grad_norm": 0.5374200940132141, "learning_rate": 6.307423747292811e-05, "loss": 2.9867, "step": 1556 }, { "epoch": 0.8921337055328895, "grad_norm": 0.6098986864089966, "learning_rate": 6.298285257124963e-05, "loss": 3.0203, "step": 1558 }, { "epoch": 0.8932789349366544, "grad_norm": 0.5418615937232971, "learning_rate": 6.289142114148085e-05, "loss": 2.9239, "step": 1560 }, { "epoch": 0.8944241643404195, "grad_norm": 0.5681843757629395, "learning_rate": 6.279994351129476e-05, "loss": 2.9077, "step": 1562 }, { "epoch": 0.8955693937441844, "grad_norm": 0.5646500587463379, "learning_rate": 6.270842000852988e-05, "loss": 2.9635, "step": 1564 }, { "epoch": 0.8967146231479494, "grad_norm": 0.513608455657959, "learning_rate": 6.261685096118917e-05, "loss": 2.9215, "step": 1566 }, { "epoch": 0.8978598525517143, "grad_norm": 0.5145309567451477, "learning_rate": 6.252523669743876e-05, "loss": 3.0064, "step": 1568 }, { "epoch": 0.8990050819554792, "grad_norm": 0.5833147168159485, "learning_rate": 6.243357754560688e-05, "loss": 2.9831, "step": 1570 }, { "epoch": 0.9001503113592442, "grad_norm": 0.5844488739967346, "learning_rate": 6.23418738341826e-05, "loss": 2.9308, "step": 1572 }, { "epoch": 0.9012955407630091, "grad_norm": 0.5789006352424622, "learning_rate": 6.225012589181471e-05, "loss": 2.9829, "step": 1574 }, { "epoch": 0.902440770166774, "grad_norm": 0.620063304901123, "learning_rate": 6.21583340473105e-05, "loss": 2.9039, "step": 1576 }, { "epoch": 0.903585999570539, "grad_norm": 0.6860834956169128, "learning_rate": 6.206649862963457e-05, "loss": 2.9643, "step": 1578 }, { "epoch": 0.9047312289743039, "grad_norm": 0.5990268588066101, "learning_rate": 6.19746199679077e-05, "loss": 2.9227, "step": 1580 }, { "epoch": 0.9058764583780688, "grad_norm": 0.549895167350769, "learning_rate": 6.188269839140569e-05, "loss": 2.9304, "step": 1582 }, { "epoch": 0.9070216877818338, "grad_norm": 0.5849148631095886, "learning_rate": 6.179073422955806e-05, "loss": 2.9216, "step": 1584 }, { "epoch": 0.9081669171855987, "grad_norm": 0.5945584177970886, "learning_rate": 6.169872781194701e-05, "loss": 2.9938, "step": 1586 }, { "epoch": 0.9093121465893637, "grad_norm": 0.5605809688568115, "learning_rate": 6.160667946830616e-05, "loss": 2.9508, "step": 1588 }, { "epoch": 0.9104573759931286, "grad_norm": 0.641635537147522, "learning_rate": 6.151458952851935e-05, "loss": 2.9194, "step": 1590 }, { "epoch": 0.9116026053968935, "grad_norm": 0.6415832042694092, "learning_rate": 6.142245832261956e-05, "loss": 2.9365, "step": 1592 }, { "epoch": 0.9127478348006585, "grad_norm": 0.5336154103279114, "learning_rate": 6.133028618078759e-05, "loss": 2.9021, "step": 1594 }, { "epoch": 0.9138930642044234, "grad_norm": 0.5383312702178955, "learning_rate": 6.1238073433351e-05, "loss": 2.8608, "step": 1596 }, { "epoch": 0.9150382936081883, "grad_norm": 0.5748524069786072, "learning_rate": 6.114582041078285e-05, "loss": 2.9156, "step": 1598 }, { "epoch": 0.9161835230119534, "grad_norm": 0.5506983995437622, "learning_rate": 6.105352744370053e-05, "loss": 2.9455, "step": 1600 }, { "epoch": 0.9173287524157183, "grad_norm": 0.5026014447212219, "learning_rate": 6.09611948628646e-05, "loss": 2.9324, "step": 1602 }, { "epoch": 0.9184739818194833, "grad_norm": 0.5291290879249573, "learning_rate": 6.086882299917758e-05, "loss": 2.9622, "step": 1604 }, { "epoch": 0.9196192112232482, "grad_norm": 0.5395295023918152, "learning_rate": 6.077641218368276e-05, "loss": 2.8979, "step": 1606 }, { "epoch": 0.9207644406270131, "grad_norm": 0.5202158093452454, "learning_rate": 6.068396274756306e-05, "loss": 2.8641, "step": 1608 }, { "epoch": 0.9219096700307781, "grad_norm": 0.4785449206829071, "learning_rate": 6.05914750221398e-05, "loss": 2.9755, "step": 1610 }, { "epoch": 0.923054899434543, "grad_norm": 0.4841679632663727, "learning_rate": 6.0498949338871494e-05, "loss": 2.9413, "step": 1612 }, { "epoch": 0.9242001288383079, "grad_norm": 0.512946367263794, "learning_rate": 6.040638602935268e-05, "loss": 2.954, "step": 1614 }, { "epoch": 0.9253453582420729, "grad_norm": 0.6236635446548462, "learning_rate": 6.031378542531282e-05, "loss": 3.0108, "step": 1616 }, { "epoch": 0.9264905876458378, "grad_norm": 0.6436796188354492, "learning_rate": 6.0221147858614944e-05, "loss": 2.9034, "step": 1618 }, { "epoch": 0.9276358170496027, "grad_norm": 0.6704596281051636, "learning_rate": 6.0128473661254605e-05, "loss": 2.9564, "step": 1620 }, { "epoch": 0.9287810464533677, "grad_norm": 0.5924867987632751, "learning_rate": 6.00357631653586e-05, "loss": 3.0252, "step": 1622 }, { "epoch": 0.9299262758571326, "grad_norm": 0.5814377069473267, "learning_rate": 5.994301670318385e-05, "loss": 2.907, "step": 1624 }, { "epoch": 0.9310715052608975, "grad_norm": 0.7266978621482849, "learning_rate": 5.985023460711612e-05, "loss": 2.9158, "step": 1626 }, { "epoch": 0.9322167346646625, "grad_norm": 0.6032978892326355, "learning_rate": 5.975741720966892e-05, "loss": 2.9025, "step": 1628 }, { "epoch": 0.9333619640684274, "grad_norm": 0.5802534818649292, "learning_rate": 5.966456484348226e-05, "loss": 2.9143, "step": 1630 }, { "epoch": 0.9345071934721924, "grad_norm": 0.6050094366073608, "learning_rate": 5.9571677841321494e-05, "loss": 2.9139, "step": 1632 }, { "epoch": 0.9356524228759573, "grad_norm": 0.6297913789749146, "learning_rate": 5.947875653607606e-05, "loss": 2.963, "step": 1634 }, { "epoch": 0.9367976522797223, "grad_norm": 0.5898680686950684, "learning_rate": 5.938580126075838e-05, "loss": 2.9342, "step": 1636 }, { "epoch": 0.9379428816834873, "grad_norm": 0.5346818566322327, "learning_rate": 5.929281234850257e-05, "loss": 2.9321, "step": 1638 }, { "epoch": 0.9390881110872522, "grad_norm": 0.5342332720756531, "learning_rate": 5.919979013256335e-05, "loss": 2.8957, "step": 1640 }, { "epoch": 0.9402333404910171, "grad_norm": 0.5973726511001587, "learning_rate": 5.910673494631474e-05, "loss": 2.9555, "step": 1642 }, { "epoch": 0.9413785698947821, "grad_norm": 0.6567181348800659, "learning_rate": 5.901364712324894e-05, "loss": 2.9272, "step": 1644 }, { "epoch": 0.942523799298547, "grad_norm": 0.6484449505805969, "learning_rate": 5.892052699697514e-05, "loss": 2.9415, "step": 1646 }, { "epoch": 0.943669028702312, "grad_norm": 0.5099656581878662, "learning_rate": 5.8827374901218256e-05, "loss": 2.927, "step": 1648 }, { "epoch": 0.9448142581060769, "grad_norm": 0.5123404860496521, "learning_rate": 5.873419116981782e-05, "loss": 2.8556, "step": 1650 }, { "epoch": 0.9459594875098418, "grad_norm": 0.6220594644546509, "learning_rate": 5.864097613672669e-05, "loss": 2.9163, "step": 1652 }, { "epoch": 0.9471047169136068, "grad_norm": 0.5449616312980652, "learning_rate": 5.854773013600993e-05, "loss": 2.9281, "step": 1654 }, { "epoch": 0.9482499463173717, "grad_norm": 0.5175302028656006, "learning_rate": 5.845445350184361e-05, "loss": 2.9238, "step": 1656 }, { "epoch": 0.9493951757211366, "grad_norm": 0.5361518859863281, "learning_rate": 5.836114656851354e-05, "loss": 2.8362, "step": 1658 }, { "epoch": 0.9505404051249016, "grad_norm": 0.5278197526931763, "learning_rate": 5.826780967041415e-05, "loss": 2.9431, "step": 1660 }, { "epoch": 0.9516856345286665, "grad_norm": 0.49755188822746277, "learning_rate": 5.817444314204725e-05, "loss": 2.9196, "step": 1662 }, { "epoch": 0.9528308639324314, "grad_norm": 0.5488353967666626, "learning_rate": 5.808104731802081e-05, "loss": 2.9003, "step": 1664 }, { "epoch": 0.9539760933361964, "grad_norm": 0.5349782109260559, "learning_rate": 5.7987622533047836e-05, "loss": 2.9372, "step": 1666 }, { "epoch": 0.9551213227399613, "grad_norm": 0.5536332726478577, "learning_rate": 5.7894169121945084e-05, "loss": 2.8327, "step": 1668 }, { "epoch": 0.9562665521437262, "grad_norm": 0.5771124362945557, "learning_rate": 5.780068741963195e-05, "loss": 2.9378, "step": 1670 }, { "epoch": 0.9574117815474912, "grad_norm": 0.5738604664802551, "learning_rate": 5.770717776112917e-05, "loss": 2.9942, "step": 1672 }, { "epoch": 0.9585570109512562, "grad_norm": 0.5480334162712097, "learning_rate": 5.7613640481557695e-05, "loss": 2.9512, "step": 1674 }, { "epoch": 0.9597022403550212, "grad_norm": 0.5497896075248718, "learning_rate": 5.752007591613745e-05, "loss": 2.8757, "step": 1676 }, { "epoch": 0.9608474697587861, "grad_norm": 0.5670433044433594, "learning_rate": 5.7426484400186163e-05, "loss": 2.9154, "step": 1678 }, { "epoch": 0.961992699162551, "grad_norm": 0.5624505281448364, "learning_rate": 5.7332866269118144e-05, "loss": 2.8875, "step": 1680 }, { "epoch": 0.963137928566316, "grad_norm": 0.5293657779693604, "learning_rate": 5.7239221858443094e-05, "loss": 2.9278, "step": 1682 }, { "epoch": 0.9642831579700809, "grad_norm": 0.509075939655304, "learning_rate": 5.714555150376486e-05, "loss": 2.8979, "step": 1684 }, { "epoch": 0.9654283873738458, "grad_norm": 0.5095922946929932, "learning_rate": 5.705185554078031e-05, "loss": 2.9111, "step": 1686 }, { "epoch": 0.9665736167776108, "grad_norm": 0.5343554019927979, "learning_rate": 5.695813430527805e-05, "loss": 2.9371, "step": 1688 }, { "epoch": 0.9677188461813757, "grad_norm": 0.5610002279281616, "learning_rate": 5.686438813313733e-05, "loss": 2.9488, "step": 1690 }, { "epoch": 0.9688640755851406, "grad_norm": 0.588938295841217, "learning_rate": 5.677061736032666e-05, "loss": 2.9073, "step": 1692 }, { "epoch": 0.9700093049889056, "grad_norm": 0.5782492756843567, "learning_rate": 5.6676822322902776e-05, "loss": 2.8714, "step": 1694 }, { "epoch": 0.9711545343926705, "grad_norm": 0.5729259848594666, "learning_rate": 5.658300335700941e-05, "loss": 2.8974, "step": 1696 }, { "epoch": 0.9722997637964355, "grad_norm": 0.5503959655761719, "learning_rate": 5.648916079887597e-05, "loss": 2.8727, "step": 1698 }, { "epoch": 0.9734449932002004, "grad_norm": 0.5312175750732422, "learning_rate": 5.639529498481646e-05, "loss": 2.8531, "step": 1700 }, { "epoch": 0.9745902226039653, "grad_norm": 0.5270197987556458, "learning_rate": 5.6301406251228216e-05, "loss": 2.9038, "step": 1702 }, { "epoch": 0.9757354520077303, "grad_norm": 0.48052453994750977, "learning_rate": 5.620749493459073e-05, "loss": 2.9239, "step": 1704 }, { "epoch": 0.9768806814114952, "grad_norm": 0.5059691667556763, "learning_rate": 5.6113561371464406e-05, "loss": 2.8952, "step": 1706 }, { "epoch": 0.9780259108152601, "grad_norm": 0.4924282133579254, "learning_rate": 5.601960589848937e-05, "loss": 2.9792, "step": 1708 }, { "epoch": 0.9791711402190251, "grad_norm": 0.4471040964126587, "learning_rate": 5.5925628852384314e-05, "loss": 2.9129, "step": 1710 }, { "epoch": 0.9803163696227901, "grad_norm": 0.49713778495788574, "learning_rate": 5.583163056994519e-05, "loss": 2.9044, "step": 1712 }, { "epoch": 0.981461599026555, "grad_norm": 0.5139912962913513, "learning_rate": 5.5737611388044086e-05, "loss": 2.9226, "step": 1714 }, { "epoch": 0.98260682843032, "grad_norm": 0.5141741037368774, "learning_rate": 5.564357164362799e-05, "loss": 2.9226, "step": 1716 }, { "epoch": 0.9837520578340849, "grad_norm": 0.4590401351451874, "learning_rate": 5.5549511673717556e-05, "loss": 2.8665, "step": 1718 }, { "epoch": 0.9848972872378499, "grad_norm": 0.4575752913951874, "learning_rate": 5.545543181540598e-05, "loss": 2.9321, "step": 1720 }, { "epoch": 0.9860425166416148, "grad_norm": 0.5326718091964722, "learning_rate": 5.5361332405857655e-05, "loss": 2.9199, "step": 1722 }, { "epoch": 0.9871877460453797, "grad_norm": 0.543998658657074, "learning_rate": 5.52672137823071e-05, "loss": 2.8879, "step": 1724 }, { "epoch": 0.9883329754491447, "grad_norm": 0.4667711555957794, "learning_rate": 5.517307628205769e-05, "loss": 2.9111, "step": 1726 }, { "epoch": 0.9894782048529096, "grad_norm": 0.5030423402786255, "learning_rate": 5.50789202424804e-05, "loss": 2.8846, "step": 1728 }, { "epoch": 0.9906234342566745, "grad_norm": 0.5402343273162842, "learning_rate": 5.498474600101272e-05, "loss": 2.9415, "step": 1730 }, { "epoch": 0.9917686636604395, "grad_norm": 0.5290342569351196, "learning_rate": 5.489055389515732e-05, "loss": 2.9221, "step": 1732 }, { "epoch": 0.9929138930642044, "grad_norm": 0.5294204354286194, "learning_rate": 5.4796344262480904e-05, "loss": 2.9049, "step": 1734 }, { "epoch": 0.9940591224679693, "grad_norm": 0.5088786482810974, "learning_rate": 5.470211744061301e-05, "loss": 2.8881, "step": 1736 }, { "epoch": 0.9952043518717343, "grad_norm": 0.5132361650466919, "learning_rate": 5.460787376724474e-05, "loss": 2.9463, "step": 1738 }, { "epoch": 0.9963495812754992, "grad_norm": 0.5157156586647034, "learning_rate": 5.451361358012763e-05, "loss": 2.8873, "step": 1740 }, { "epoch": 0.9974948106792642, "grad_norm": 0.5482271909713745, "learning_rate": 5.441933721707236e-05, "loss": 2.9052, "step": 1742 }, { "epoch": 0.9986400400830291, "grad_norm": 0.49170711636543274, "learning_rate": 5.432504501594763e-05, "loss": 2.878, "step": 1744 }, { "epoch": 0.999785269486794, "grad_norm": 0.5055040717124939, "learning_rate": 5.423073731467885e-05, "loss": 2.9521, "step": 1746 }, { "epoch": 1.0005726147018825, "grad_norm": 0.6301170587539673, "learning_rate": 5.4136414451246995e-05, "loss": 2.8161, "step": 1748 }, { "epoch": 1.0017178441056473, "grad_norm": 0.577078104019165, "learning_rate": 5.40420767636874e-05, "loss": 2.854, "step": 1750 }, { "epoch": 1.0028630735094124, "grad_norm": 0.5432089567184448, "learning_rate": 5.3947724590088475e-05, "loss": 2.905, "step": 1752 }, { "epoch": 1.0040083029131772, "grad_norm": 0.5131680369377136, "learning_rate": 5.3853358268590624e-05, "loss": 2.8725, "step": 1754 }, { "epoch": 1.0051535323169423, "grad_norm": 0.5810659527778625, "learning_rate": 5.37589781373849e-05, "loss": 2.9462, "step": 1756 }, { "epoch": 1.006298761720707, "grad_norm": 0.5084631443023682, "learning_rate": 5.366458453471184e-05, "loss": 2.7939, "step": 1758 }, { "epoch": 1.0074439911244721, "grad_norm": 0.49303531646728516, "learning_rate": 5.35701777988603e-05, "loss": 2.8416, "step": 1760 }, { "epoch": 1.008589220528237, "grad_norm": 0.46372827887535095, "learning_rate": 5.3475758268166164e-05, "loss": 2.8337, "step": 1762 }, { "epoch": 1.009734449932002, "grad_norm": 0.47583991289138794, "learning_rate": 5.3381326281011204e-05, "loss": 2.8772, "step": 1764 }, { "epoch": 1.0108796793357668, "grad_norm": 0.5093927383422852, "learning_rate": 5.328688217582182e-05, "loss": 2.8147, "step": 1766 }, { "epoch": 1.0120249087395319, "grad_norm": 0.5356534719467163, "learning_rate": 5.3192426291067795e-05, "loss": 2.8479, "step": 1768 }, { "epoch": 1.013170138143297, "grad_norm": 0.5348360538482666, "learning_rate": 5.309795896526124e-05, "loss": 2.9173, "step": 1770 }, { "epoch": 1.0143153675470618, "grad_norm": 0.5063700675964355, "learning_rate": 5.300348053695515e-05, "loss": 2.8603, "step": 1772 }, { "epoch": 1.0154605969508268, "grad_norm": 0.5169893503189087, "learning_rate": 5.2908991344742375e-05, "loss": 2.8093, "step": 1774 }, { "epoch": 1.0166058263545916, "grad_norm": 0.5480731725692749, "learning_rate": 5.281449172725433e-05, "loss": 2.8459, "step": 1776 }, { "epoch": 1.0177510557583567, "grad_norm": 0.5607473850250244, "learning_rate": 5.2719982023159765e-05, "loss": 2.8449, "step": 1778 }, { "epoch": 1.0188962851621215, "grad_norm": 0.5521141290664673, "learning_rate": 5.262546257116362e-05, "loss": 2.8801, "step": 1780 }, { "epoch": 1.0200415145658865, "grad_norm": 0.5414731502532959, "learning_rate": 5.2530933710005736e-05, "loss": 2.8596, "step": 1782 }, { "epoch": 1.0211867439696514, "grad_norm": 0.5596434473991394, "learning_rate": 5.243639577845971e-05, "loss": 2.8818, "step": 1784 }, { "epoch": 1.0223319733734164, "grad_norm": 0.5274948477745056, "learning_rate": 5.234184911533161e-05, "loss": 2.8583, "step": 1786 }, { "epoch": 1.0234772027771812, "grad_norm": 0.5410761833190918, "learning_rate": 5.224729405945879e-05, "loss": 2.9212, "step": 1788 }, { "epoch": 1.0246224321809463, "grad_norm": 0.5315967202186584, "learning_rate": 5.215273094970876e-05, "loss": 2.8735, "step": 1790 }, { "epoch": 1.025767661584711, "grad_norm": 0.5669625997543335, "learning_rate": 5.205816012497777e-05, "loss": 2.914, "step": 1792 }, { "epoch": 1.0269128909884762, "grad_norm": 0.6059103012084961, "learning_rate": 5.196358192418983e-05, "loss": 2.9223, "step": 1794 }, { "epoch": 1.028058120392241, "grad_norm": 0.5729017853736877, "learning_rate": 5.186899668629532e-05, "loss": 2.8927, "step": 1796 }, { "epoch": 1.029203349796006, "grad_norm": 0.5530718564987183, "learning_rate": 5.1774404750269876e-05, "loss": 2.8323, "step": 1798 }, { "epoch": 1.0303485791997709, "grad_norm": 0.5186034440994263, "learning_rate": 5.167980645511311e-05, "loss": 2.8628, "step": 1800 }, { "epoch": 1.031493808603536, "grad_norm": 0.504193902015686, "learning_rate": 5.1585202139847424e-05, "loss": 2.8289, "step": 1802 }, { "epoch": 1.032639038007301, "grad_norm": 0.48196157813072205, "learning_rate": 5.149059214351683e-05, "loss": 2.8307, "step": 1804 }, { "epoch": 1.0337842674110658, "grad_norm": 0.5442152619361877, "learning_rate": 5.139597680518564e-05, "loss": 2.8902, "step": 1806 }, { "epoch": 1.0349294968148308, "grad_norm": 0.4562651813030243, "learning_rate": 5.130135646393739e-05, "loss": 2.8007, "step": 1808 }, { "epoch": 1.0360747262185956, "grad_norm": 0.519249439239502, "learning_rate": 5.120673145887349e-05, "loss": 2.8248, "step": 1810 }, { "epoch": 1.0372199556223607, "grad_norm": 0.5793293714523315, "learning_rate": 5.1112102129112074e-05, "loss": 2.8999, "step": 1812 }, { "epoch": 1.0383651850261255, "grad_norm": 0.5099198818206787, "learning_rate": 5.101746881378677e-05, "loss": 2.875, "step": 1814 }, { "epoch": 1.0395104144298906, "grad_norm": 0.527511715888977, "learning_rate": 5.09228318520455e-05, "loss": 2.8651, "step": 1816 }, { "epoch": 1.0406556438336554, "grad_norm": 0.5045714974403381, "learning_rate": 5.0828191583049265e-05, "loss": 2.85, "step": 1818 }, { "epoch": 1.0418008732374204, "grad_norm": 0.556731104850769, "learning_rate": 5.073354834597091e-05, "loss": 2.9094, "step": 1820 }, { "epoch": 1.0429461026411853, "grad_norm": 0.5212239027023315, "learning_rate": 5.0638902479993886e-05, "loss": 2.7986, "step": 1822 }, { "epoch": 1.0440913320449503, "grad_norm": 0.5161175727844238, "learning_rate": 5.0544254324311105e-05, "loss": 2.8411, "step": 1824 }, { "epoch": 1.0452365614487151, "grad_norm": 0.4778583347797394, "learning_rate": 5.044960421812367e-05, "loss": 2.8226, "step": 1826 }, { "epoch": 1.0463817908524802, "grad_norm": 0.4907243251800537, "learning_rate": 5.0354952500639674e-05, "loss": 2.8726, "step": 1828 }, { "epoch": 1.047527020256245, "grad_norm": 0.4880329966545105, "learning_rate": 5.0260299511073003e-05, "loss": 2.8798, "step": 1830 }, { "epoch": 1.04867224966001, "grad_norm": 0.5149087309837341, "learning_rate": 5.016564558864205e-05, "loss": 2.8483, "step": 1832 }, { "epoch": 1.0498174790637749, "grad_norm": 0.5067439675331116, "learning_rate": 5.007099107256863e-05, "loss": 2.8022, "step": 1834 }, { "epoch": 1.05096270846754, "grad_norm": 0.506746768951416, "learning_rate": 4.9976336302076604e-05, "loss": 2.8279, "step": 1836 }, { "epoch": 1.0521079378713047, "grad_norm": 0.49440717697143555, "learning_rate": 4.988168161639081e-05, "loss": 2.8973, "step": 1838 }, { "epoch": 1.0532531672750698, "grad_norm": 0.45895057916641235, "learning_rate": 4.9787027354735755e-05, "loss": 2.8878, "step": 1840 }, { "epoch": 1.0543983966788346, "grad_norm": 0.5206420421600342, "learning_rate": 4.9692373856334404e-05, "loss": 2.8554, "step": 1842 }, { "epoch": 1.0555436260825997, "grad_norm": 0.49703797698020935, "learning_rate": 4.9597721460407014e-05, "loss": 2.8716, "step": 1844 }, { "epoch": 1.0566888554863647, "grad_norm": 0.4677201807498932, "learning_rate": 4.9503070506169896e-05, "loss": 2.8888, "step": 1846 }, { "epoch": 1.0578340848901295, "grad_norm": 0.49985969066619873, "learning_rate": 4.940842133283419e-05, "loss": 2.896, "step": 1848 }, { "epoch": 1.0589793142938946, "grad_norm": 0.5004621744155884, "learning_rate": 4.931377427960464e-05, "loss": 2.9012, "step": 1850 }, { "epoch": 1.0601245436976594, "grad_norm": 0.45411524176597595, "learning_rate": 4.921912968567839e-05, "loss": 2.852, "step": 1852 }, { "epoch": 1.0612697731014245, "grad_norm": 0.5539801716804504, "learning_rate": 4.912448789024378e-05, "loss": 2.8653, "step": 1854 }, { "epoch": 1.0624150025051893, "grad_norm": 0.566481351852417, "learning_rate": 4.902984923247914e-05, "loss": 2.8847, "step": 1856 }, { "epoch": 1.0635602319089543, "grad_norm": 0.503288209438324, "learning_rate": 4.893521405155153e-05, "loss": 2.9068, "step": 1858 }, { "epoch": 1.0647054613127191, "grad_norm": 0.5400659441947937, "learning_rate": 4.884058268661555e-05, "loss": 2.8766, "step": 1860 }, { "epoch": 1.0658506907164842, "grad_norm": 0.507279634475708, "learning_rate": 4.8745955476812126e-05, "loss": 2.8745, "step": 1862 }, { "epoch": 1.066995920120249, "grad_norm": 0.4649648070335388, "learning_rate": 4.8651332761267284e-05, "loss": 2.8027, "step": 1864 }, { "epoch": 1.068141149524014, "grad_norm": 0.49418357014656067, "learning_rate": 4.855671487909098e-05, "loss": 2.778, "step": 1866 }, { "epoch": 1.069286378927779, "grad_norm": 0.5281592607498169, "learning_rate": 4.8462102169375836e-05, "loss": 2.8031, "step": 1868 }, { "epoch": 1.070431608331544, "grad_norm": 0.5579238533973694, "learning_rate": 4.83674949711959e-05, "loss": 2.8735, "step": 1870 }, { "epoch": 1.0715768377353088, "grad_norm": 0.5268048048019409, "learning_rate": 4.8272893623605494e-05, "loss": 2.8672, "step": 1872 }, { "epoch": 1.0727220671390738, "grad_norm": 0.49692845344543457, "learning_rate": 4.8178298465637966e-05, "loss": 2.8589, "step": 1874 }, { "epoch": 1.0738672965428386, "grad_norm": 0.5442864298820496, "learning_rate": 4.808370983630451e-05, "loss": 2.8131, "step": 1876 }, { "epoch": 1.0750125259466037, "grad_norm": 0.5363568067550659, "learning_rate": 4.798912807459288e-05, "loss": 2.8647, "step": 1878 }, { "epoch": 1.0761577553503687, "grad_norm": 0.5348667502403259, "learning_rate": 4.789455351946625e-05, "loss": 2.9362, "step": 1880 }, { "epoch": 1.0773029847541336, "grad_norm": 0.4977494478225708, "learning_rate": 4.7799986509861946e-05, "loss": 2.8651, "step": 1882 }, { "epoch": 1.0784482141578986, "grad_norm": 0.5153520107269287, "learning_rate": 4.770542738469024e-05, "loss": 2.8881, "step": 1884 }, { "epoch": 1.0795934435616634, "grad_norm": 0.5237112045288086, "learning_rate": 4.761087648283321e-05, "loss": 2.8173, "step": 1886 }, { "epoch": 1.0807386729654285, "grad_norm": 0.5062981843948364, "learning_rate": 4.75163341431434e-05, "loss": 2.8825, "step": 1888 }, { "epoch": 1.0818839023691933, "grad_norm": 0.4902750849723816, "learning_rate": 4.742180070444269e-05, "loss": 2.8563, "step": 1890 }, { "epoch": 1.0830291317729583, "grad_norm": 0.5393232107162476, "learning_rate": 4.7327276505521065e-05, "loss": 2.8537, "step": 1892 }, { "epoch": 1.0841743611767232, "grad_norm": 0.5459491610527039, "learning_rate": 4.7232761885135375e-05, "loss": 2.8856, "step": 1894 }, { "epoch": 1.0853195905804882, "grad_norm": 0.5010082125663757, "learning_rate": 4.713825718200818e-05, "loss": 2.9151, "step": 1896 }, { "epoch": 1.086464819984253, "grad_norm": 0.47848883271217346, "learning_rate": 4.704376273482648e-05, "loss": 2.8515, "step": 1898 }, { "epoch": 1.087610049388018, "grad_norm": 0.45873889327049255, "learning_rate": 4.694927888224051e-05, "loss": 2.8691, "step": 1900 }, { "epoch": 1.088755278791783, "grad_norm": 0.4366936683654785, "learning_rate": 4.685480596286254e-05, "loss": 2.8606, "step": 1902 }, { "epoch": 1.089900508195548, "grad_norm": 0.43981027603149414, "learning_rate": 4.676034431526565e-05, "loss": 2.8425, "step": 1904 }, { "epoch": 1.0910457375993128, "grad_norm": 0.44349777698516846, "learning_rate": 4.666589427798256e-05, "loss": 2.8373, "step": 1906 }, { "epoch": 1.0921909670030778, "grad_norm": 0.45012906193733215, "learning_rate": 4.657145618950434e-05, "loss": 2.7538, "step": 1908 }, { "epoch": 1.0933361964068427, "grad_norm": 0.47416791319847107, "learning_rate": 4.6477030388279264e-05, "loss": 2.8737, "step": 1910 }, { "epoch": 1.0944814258106077, "grad_norm": 0.4794783592224121, "learning_rate": 4.638261721271155e-05, "loss": 2.8695, "step": 1912 }, { "epoch": 1.0956266552143727, "grad_norm": 0.48366686701774597, "learning_rate": 4.628821700116016e-05, "loss": 2.8048, "step": 1914 }, { "epoch": 1.0967718846181376, "grad_norm": 0.5032996535301208, "learning_rate": 4.6193830091937654e-05, "loss": 2.9137, "step": 1916 }, { "epoch": 1.0979171140219024, "grad_norm": 0.5226159691810608, "learning_rate": 4.6099456823308856e-05, "loss": 2.8601, "step": 1918 }, { "epoch": 1.0990623434256674, "grad_norm": 0.47801947593688965, "learning_rate": 4.600509753348974e-05, "loss": 2.8837, "step": 1920 }, { "epoch": 1.1002075728294325, "grad_norm": 0.5159834027290344, "learning_rate": 4.591075256064615e-05, "loss": 2.8732, "step": 1922 }, { "epoch": 1.1013528022331973, "grad_norm": 0.5375920534133911, "learning_rate": 4.581642224289265e-05, "loss": 2.9112, "step": 1924 }, { "epoch": 1.1024980316369624, "grad_norm": 0.500263512134552, "learning_rate": 4.572210691829129e-05, "loss": 2.8028, "step": 1926 }, { "epoch": 1.1036432610407272, "grad_norm": 0.49632856249809265, "learning_rate": 4.562780692485035e-05, "loss": 2.9128, "step": 1928 }, { "epoch": 1.1047884904444922, "grad_norm": 0.524261474609375, "learning_rate": 4.553352260052319e-05, "loss": 2.8319, "step": 1930 }, { "epoch": 1.105933719848257, "grad_norm": 0.49589022994041443, "learning_rate": 4.5439254283207e-05, "loss": 2.8127, "step": 1932 }, { "epoch": 1.107078949252022, "grad_norm": 0.47969597578048706, "learning_rate": 4.534500231074165e-05, "loss": 2.8307, "step": 1934 }, { "epoch": 1.108224178655787, "grad_norm": 0.47272107005119324, "learning_rate": 4.525076702090838e-05, "loss": 2.8357, "step": 1936 }, { "epoch": 1.109369408059552, "grad_norm": 0.4718839228153229, "learning_rate": 4.515654875142866e-05, "loss": 2.8521, "step": 1938 }, { "epoch": 1.1105146374633168, "grad_norm": 0.493868350982666, "learning_rate": 4.506234783996297e-05, "loss": 2.877, "step": 1940 }, { "epoch": 1.1116598668670818, "grad_norm": 0.500551164150238, "learning_rate": 4.4968164624109545e-05, "loss": 2.8534, "step": 1942 }, { "epoch": 1.1128050962708467, "grad_norm": 0.47467198967933655, "learning_rate": 4.4873999441403284e-05, "loss": 2.8302, "step": 1944 }, { "epoch": 1.1139503256746117, "grad_norm": 0.494477778673172, "learning_rate": 4.477985262931437e-05, "loss": 2.8037, "step": 1946 }, { "epoch": 1.1150955550783765, "grad_norm": 0.514734148979187, "learning_rate": 4.4685724525247215e-05, "loss": 2.8516, "step": 1948 }, { "epoch": 1.1162407844821416, "grad_norm": 0.47042450308799744, "learning_rate": 4.459161546653913e-05, "loss": 2.816, "step": 1950 }, { "epoch": 1.1173860138859064, "grad_norm": 0.508755087852478, "learning_rate": 4.4497525790459204e-05, "loss": 2.8241, "step": 1952 }, { "epoch": 1.1185312432896715, "grad_norm": 0.470024049282074, "learning_rate": 4.440345583420707e-05, "loss": 2.8108, "step": 1954 }, { "epoch": 1.1196764726934365, "grad_norm": 0.4759051203727722, "learning_rate": 4.4309405934911674e-05, "loss": 2.8719, "step": 1956 }, { "epoch": 1.1208217020972013, "grad_norm": 0.45637574791908264, "learning_rate": 4.421537642963007e-05, "loss": 2.8487, "step": 1958 }, { "epoch": 1.1219669315009664, "grad_norm": 0.4891573488712311, "learning_rate": 4.412136765534624e-05, "loss": 2.8493, "step": 1960 }, { "epoch": 1.1231121609047312, "grad_norm": 0.5303109288215637, "learning_rate": 4.4027379948969846e-05, "loss": 2.8085, "step": 1962 }, { "epoch": 1.1242573903084963, "grad_norm": 0.5703256130218506, "learning_rate": 4.393341364733512e-05, "loss": 2.8547, "step": 1964 }, { "epoch": 1.125402619712261, "grad_norm": 0.6061555743217468, "learning_rate": 4.383946908719949e-05, "loss": 2.8631, "step": 1966 }, { "epoch": 1.1265478491160261, "grad_norm": 0.6273899674415588, "learning_rate": 4.374554660524253e-05, "loss": 2.8537, "step": 1968 }, { "epoch": 1.127693078519791, "grad_norm": 0.5794036984443665, "learning_rate": 4.365164653806464e-05, "loss": 2.8313, "step": 1970 }, { "epoch": 1.128838307923556, "grad_norm": 0.5450258255004883, "learning_rate": 4.355776922218593e-05, "loss": 2.8261, "step": 1972 }, { "epoch": 1.1299835373273208, "grad_norm": 0.5514605045318604, "learning_rate": 4.3463914994044976e-05, "loss": 2.8006, "step": 1974 }, { "epoch": 1.1311287667310859, "grad_norm": 0.4708465039730072, "learning_rate": 4.337008418999757e-05, "loss": 2.7886, "step": 1976 }, { "epoch": 1.1322739961348507, "grad_norm": 0.5147237777709961, "learning_rate": 4.3276277146315605e-05, "loss": 2.8763, "step": 1978 }, { "epoch": 1.1334192255386157, "grad_norm": 0.5111591219902039, "learning_rate": 4.318249419918579e-05, "loss": 2.8965, "step": 1980 }, { "epoch": 1.1345644549423806, "grad_norm": 0.45969030261039734, "learning_rate": 4.3088735684708457e-05, "loss": 2.7846, "step": 1982 }, { "epoch": 1.1357096843461456, "grad_norm": 0.47163280844688416, "learning_rate": 4.299500193889645e-05, "loss": 2.8163, "step": 1984 }, { "epoch": 1.1368549137499104, "grad_norm": 0.48009538650512695, "learning_rate": 4.2901293297673794e-05, "loss": 2.8405, "step": 1986 }, { "epoch": 1.1380001431536755, "grad_norm": 0.5266420841217041, "learning_rate": 4.2807610096874535e-05, "loss": 2.842, "step": 1988 }, { "epoch": 1.1391453725574405, "grad_norm": 0.4645231068134308, "learning_rate": 4.271395267224157e-05, "loss": 2.8676, "step": 1990 }, { "epoch": 1.1402906019612054, "grad_norm": 0.47144144773483276, "learning_rate": 4.262032135942539e-05, "loss": 2.863, "step": 1992 }, { "epoch": 1.1414358313649702, "grad_norm": 0.5167996287345886, "learning_rate": 4.252671649398296e-05, "loss": 2.897, "step": 1994 }, { "epoch": 1.1425810607687352, "grad_norm": 0.5122323632240295, "learning_rate": 4.243313841137642e-05, "loss": 2.8663, "step": 1996 }, { "epoch": 1.1437262901725003, "grad_norm": 0.5562819242477417, "learning_rate": 4.233958744697193e-05, "loss": 2.9046, "step": 1998 }, { "epoch": 1.144871519576265, "grad_norm": 0.5265432596206665, "learning_rate": 4.224606393603847e-05, "loss": 2.8505, "step": 2000 }, { "epoch": 1.1460167489800301, "grad_norm": 0.4736781120300293, "learning_rate": 4.2152568213746616e-05, "loss": 2.8337, "step": 2002 }, { "epoch": 1.147161978383795, "grad_norm": 0.49419066309928894, "learning_rate": 4.2059100615167394e-05, "loss": 2.8547, "step": 2004 }, { "epoch": 1.14830720778756, "grad_norm": 0.47105368971824646, "learning_rate": 4.1965661475271004e-05, "loss": 2.8748, "step": 2006 }, { "epoch": 1.1494524371913248, "grad_norm": 0.4889761507511139, "learning_rate": 4.187225112892567e-05, "loss": 2.8016, "step": 2008 }, { "epoch": 1.1505976665950899, "grad_norm": 0.4951672852039337, "learning_rate": 4.177886991089641e-05, "loss": 2.8293, "step": 2010 }, { "epoch": 1.1517428959988547, "grad_norm": 0.47535356879234314, "learning_rate": 4.168551815584385e-05, "loss": 2.8045, "step": 2012 }, { "epoch": 1.1528881254026198, "grad_norm": 0.4469471871852875, "learning_rate": 4.159219619832309e-05, "loss": 2.7951, "step": 2014 }, { "epoch": 1.1540333548063846, "grad_norm": 0.5131164193153381, "learning_rate": 4.149890437278235e-05, "loss": 2.9102, "step": 2016 }, { "epoch": 1.1551785842101496, "grad_norm": 0.5199970602989197, "learning_rate": 4.1405643013561926e-05, "loss": 2.8451, "step": 2018 }, { "epoch": 1.1563238136139145, "grad_norm": 0.4965759515762329, "learning_rate": 4.1312412454892885e-05, "loss": 2.8163, "step": 2020 }, { "epoch": 1.1574690430176795, "grad_norm": 0.4741258919239044, "learning_rate": 4.1219213030895936e-05, "loss": 2.8432, "step": 2022 }, { "epoch": 1.1586142724214443, "grad_norm": 0.46175336837768555, "learning_rate": 4.112604507558023e-05, "loss": 2.8825, "step": 2024 }, { "epoch": 1.1597595018252094, "grad_norm": 0.45644518733024597, "learning_rate": 4.1032908922842084e-05, "loss": 2.8631, "step": 2026 }, { "epoch": 1.1609047312289742, "grad_norm": 0.5027674436569214, "learning_rate": 4.093980490646388e-05, "loss": 2.8437, "step": 2028 }, { "epoch": 1.1620499606327392, "grad_norm": 0.5348462462425232, "learning_rate": 4.084673336011282e-05, "loss": 2.8361, "step": 2030 }, { "epoch": 1.1631951900365043, "grad_norm": 0.5205281972885132, "learning_rate": 4.075369461733971e-05, "loss": 2.8331, "step": 2032 }, { "epoch": 1.1643404194402691, "grad_norm": 0.5650776028633118, "learning_rate": 4.066068901157787e-05, "loss": 2.9023, "step": 2034 }, { "epoch": 1.1654856488440342, "grad_norm": 0.5416718125343323, "learning_rate": 4.05677168761418e-05, "loss": 2.8183, "step": 2036 }, { "epoch": 1.166630878247799, "grad_norm": 0.47712674736976624, "learning_rate": 4.047477854422606e-05, "loss": 2.8585, "step": 2038 }, { "epoch": 1.167776107651564, "grad_norm": 0.47490790486335754, "learning_rate": 4.038187434890407e-05, "loss": 2.7941, "step": 2040 }, { "epoch": 1.1689213370553289, "grad_norm": 0.49218589067459106, "learning_rate": 4.028900462312689e-05, "loss": 2.8257, "step": 2042 }, { "epoch": 1.170066566459094, "grad_norm": 0.4539277255535126, "learning_rate": 4.01961696997221e-05, "loss": 2.8154, "step": 2044 }, { "epoch": 1.1712117958628587, "grad_norm": 0.49758878350257874, "learning_rate": 4.010336991139252e-05, "loss": 2.8991, "step": 2046 }, { "epoch": 1.1723570252666238, "grad_norm": 0.508026123046875, "learning_rate": 4.0010605590715044e-05, "loss": 2.8909, "step": 2048 }, { "epoch": 1.1735022546703886, "grad_norm": 0.48496854305267334, "learning_rate": 3.991787707013947e-05, "loss": 2.7767, "step": 2050 }, { "epoch": 1.1746474840741536, "grad_norm": 0.5234657526016235, "learning_rate": 3.982518468198728e-05, "loss": 2.859, "step": 2052 }, { "epoch": 1.1757927134779185, "grad_norm": 0.5635547637939453, "learning_rate": 3.97325287584505e-05, "loss": 2.8283, "step": 2054 }, { "epoch": 1.1769379428816835, "grad_norm": 0.5184863805770874, "learning_rate": 3.963990963159045e-05, "loss": 2.8226, "step": 2056 }, { "epoch": 1.1780831722854483, "grad_norm": 0.5278655886650085, "learning_rate": 3.9547327633336564e-05, "loss": 2.8775, "step": 2058 }, { "epoch": 1.1792284016892134, "grad_norm": 0.4541829526424408, "learning_rate": 3.945478309548524e-05, "loss": 2.8208, "step": 2060 }, { "epoch": 1.1803736310929782, "grad_norm": 0.47832149267196655, "learning_rate": 3.936227634969858e-05, "loss": 2.8785, "step": 2062 }, { "epoch": 1.1815188604967433, "grad_norm": 0.46495962142944336, "learning_rate": 3.9269807727503324e-05, "loss": 2.9003, "step": 2064 }, { "epoch": 1.1826640899005083, "grad_norm": 0.46806037425994873, "learning_rate": 3.917737756028954e-05, "loss": 2.8832, "step": 2066 }, { "epoch": 1.1838093193042731, "grad_norm": 0.4699292480945587, "learning_rate": 3.9084986179309466e-05, "loss": 2.8043, "step": 2068 }, { "epoch": 1.184954548708038, "grad_norm": 0.5012586712837219, "learning_rate": 3.899263391567635e-05, "loss": 2.8403, "step": 2070 }, { "epoch": 1.186099778111803, "grad_norm": 0.4917641580104828, "learning_rate": 3.890032110036324e-05, "loss": 2.8293, "step": 2072 }, { "epoch": 1.187245007515568, "grad_norm": 0.49173396825790405, "learning_rate": 3.8808048064201855e-05, "loss": 2.8868, "step": 2074 }, { "epoch": 1.1883902369193329, "grad_norm": 0.4861849844455719, "learning_rate": 3.8715815137881304e-05, "loss": 2.7915, "step": 2076 }, { "epoch": 1.189535466323098, "grad_norm": 0.5474852323532104, "learning_rate": 3.862362265194697e-05, "loss": 2.9182, "step": 2078 }, { "epoch": 1.1906806957268627, "grad_norm": 0.5007859468460083, "learning_rate": 3.853147093679929e-05, "loss": 2.8388, "step": 2080 }, { "epoch": 1.1918259251306278, "grad_norm": 0.4659948945045471, "learning_rate": 3.8439360322692584e-05, "loss": 2.8256, "step": 2082 }, { "epoch": 1.1929711545343926, "grad_norm": 0.48769453167915344, "learning_rate": 3.8347291139733934e-05, "loss": 2.795, "step": 2084 }, { "epoch": 1.1941163839381577, "grad_norm": 0.4881047308444977, "learning_rate": 3.825526371788186e-05, "loss": 2.7846, "step": 2086 }, { "epoch": 1.1952616133419225, "grad_norm": 0.5094773173332214, "learning_rate": 3.8163278386945265e-05, "loss": 2.8577, "step": 2088 }, { "epoch": 1.1964068427456875, "grad_norm": 0.506121039390564, "learning_rate": 3.8071335476582185e-05, "loss": 2.7938, "step": 2090 }, { "epoch": 1.1975520721494524, "grad_norm": 0.5210023522377014, "learning_rate": 3.7979435316298616e-05, "loss": 2.8367, "step": 2092 }, { "epoch": 1.1986973015532174, "grad_norm": 0.5009431838989258, "learning_rate": 3.788757823544742e-05, "loss": 2.8387, "step": 2094 }, { "epoch": 1.1998425309569822, "grad_norm": 0.49134376645088196, "learning_rate": 3.779576456322698e-05, "loss": 2.8655, "step": 2096 }, { "epoch": 1.2009877603607473, "grad_norm": 0.47163623571395874, "learning_rate": 3.770399462868015e-05, "loss": 2.8167, "step": 2098 }, { "epoch": 1.2021329897645123, "grad_norm": 0.4546791613101959, "learning_rate": 3.7612268760693034e-05, "loss": 2.7889, "step": 2100 }, { "epoch": 1.2032782191682772, "grad_norm": 0.4658569395542145, "learning_rate": 3.752058728799382e-05, "loss": 2.8211, "step": 2102 }, { "epoch": 1.204423448572042, "grad_norm": 0.4912261962890625, "learning_rate": 3.742895053915156e-05, "loss": 2.8545, "step": 2104 }, { "epoch": 1.205568677975807, "grad_norm": 0.46961137652397156, "learning_rate": 3.7337358842575085e-05, "loss": 2.8495, "step": 2106 }, { "epoch": 1.206713907379572, "grad_norm": 0.48422500491142273, "learning_rate": 3.724581252651169e-05, "loss": 2.9101, "step": 2108 }, { "epoch": 1.207859136783337, "grad_norm": 0.46308696269989014, "learning_rate": 3.7154311919046084e-05, "loss": 2.8123, "step": 2110 }, { "epoch": 1.209004366187102, "grad_norm": 0.4869654178619385, "learning_rate": 3.706285734809914e-05, "loss": 2.8467, "step": 2112 }, { "epoch": 1.2101495955908668, "grad_norm": 0.4725172221660614, "learning_rate": 3.69714491414268e-05, "loss": 2.829, "step": 2114 }, { "epoch": 1.2112948249946318, "grad_norm": 0.4987369477748871, "learning_rate": 3.688008762661876e-05, "loss": 2.8589, "step": 2116 }, { "epoch": 1.2124400543983966, "grad_norm": 0.5197380185127258, "learning_rate": 3.678877313109745e-05, "loss": 2.892, "step": 2118 }, { "epoch": 1.2135852838021617, "grad_norm": 0.5172231793403625, "learning_rate": 3.669750598211676e-05, "loss": 2.8488, "step": 2120 }, { "epoch": 1.2147305132059265, "grad_norm": 0.48699328303337097, "learning_rate": 3.6606286506760904e-05, "loss": 2.8209, "step": 2122 }, { "epoch": 1.2158757426096916, "grad_norm": 0.4997568726539612, "learning_rate": 3.6515115031943245e-05, "loss": 2.8332, "step": 2124 }, { "epoch": 1.2170209720134564, "grad_norm": 0.4714409410953522, "learning_rate": 3.642399188440512e-05, "loss": 2.8782, "step": 2126 }, { "epoch": 1.2181662014172214, "grad_norm": 0.4618817865848541, "learning_rate": 3.633291739071466e-05, "loss": 2.8364, "step": 2128 }, { "epoch": 1.2193114308209863, "grad_norm": 0.46833354234695435, "learning_rate": 3.624189187726565e-05, "loss": 2.8518, "step": 2130 }, { "epoch": 1.2204566602247513, "grad_norm": 0.47965744137763977, "learning_rate": 3.615091567027635e-05, "loss": 2.7592, "step": 2132 }, { "epoch": 1.2216018896285161, "grad_norm": 0.4807797074317932, "learning_rate": 3.6059989095788266e-05, "loss": 2.7822, "step": 2134 }, { "epoch": 1.2227471190322812, "grad_norm": 0.4648941457271576, "learning_rate": 3.596911247966507e-05, "loss": 2.8691, "step": 2136 }, { "epoch": 1.223892348436046, "grad_norm": 0.4928225874900818, "learning_rate": 3.587828614759138e-05, "loss": 2.8773, "step": 2138 }, { "epoch": 1.225037577839811, "grad_norm": 0.4892253875732422, "learning_rate": 3.578751042507158e-05, "loss": 2.8158, "step": 2140 }, { "epoch": 1.226182807243576, "grad_norm": 0.5147957801818848, "learning_rate": 3.5696785637428755e-05, "loss": 2.8408, "step": 2142 }, { "epoch": 1.227328036647341, "grad_norm": 0.5246678590774536, "learning_rate": 3.560611210980337e-05, "loss": 2.8524, "step": 2144 }, { "epoch": 1.2284732660511057, "grad_norm": 0.47752901911735535, "learning_rate": 3.551549016715223e-05, "loss": 2.8031, "step": 2146 }, { "epoch": 1.2296184954548708, "grad_norm": 0.45269060134887695, "learning_rate": 3.542492013424724e-05, "loss": 2.8453, "step": 2148 }, { "epoch": 1.2307637248586358, "grad_norm": 0.5489292740821838, "learning_rate": 3.533440233567428e-05, "loss": 2.8513, "step": 2150 }, { "epoch": 1.2319089542624007, "grad_norm": 0.5441611409187317, "learning_rate": 3.524393709583206e-05, "loss": 2.8339, "step": 2152 }, { "epoch": 1.2330541836661657, "grad_norm": 0.47214022278785706, "learning_rate": 3.5153524738930876e-05, "loss": 2.841, "step": 2154 }, { "epoch": 1.2341994130699305, "grad_norm": 0.45212140679359436, "learning_rate": 3.506316558899154e-05, "loss": 2.7889, "step": 2156 }, { "epoch": 1.2353446424736956, "grad_norm": 0.46857187151908875, "learning_rate": 3.497285996984417e-05, "loss": 2.8283, "step": 2158 }, { "epoch": 1.2364898718774604, "grad_norm": 0.474598228931427, "learning_rate": 3.488260820512703e-05, "loss": 2.7821, "step": 2160 }, { "epoch": 1.2376351012812254, "grad_norm": 0.4739062190055847, "learning_rate": 3.479241061828542e-05, "loss": 2.8358, "step": 2162 }, { "epoch": 1.2387803306849903, "grad_norm": 0.45378774404525757, "learning_rate": 3.470226753257042e-05, "loss": 2.7641, "step": 2164 }, { "epoch": 1.2399255600887553, "grad_norm": 0.4402100145816803, "learning_rate": 3.4612179271037834e-05, "loss": 2.7888, "step": 2166 }, { "epoch": 1.2410707894925201, "grad_norm": 0.4304746985435486, "learning_rate": 3.452214615654696e-05, "loss": 2.8125, "step": 2168 }, { "epoch": 1.2422160188962852, "grad_norm": 0.4380868375301361, "learning_rate": 3.4432168511759436e-05, "loss": 2.8348, "step": 2170 }, { "epoch": 1.24336124830005, "grad_norm": 0.4232270121574402, "learning_rate": 3.434224665913819e-05, "loss": 2.8802, "step": 2172 }, { "epoch": 1.244506477703815, "grad_norm": 0.4585626423358917, "learning_rate": 3.425238092094612e-05, "loss": 2.8771, "step": 2174 }, { "epoch": 1.24565170710758, "grad_norm": 0.5192862153053284, "learning_rate": 3.416257161924508e-05, "loss": 2.8736, "step": 2176 }, { "epoch": 1.246796936511345, "grad_norm": 0.48126840591430664, "learning_rate": 3.407281907589459e-05, "loss": 2.841, "step": 2178 }, { "epoch": 1.2479421659151098, "grad_norm": 0.45204365253448486, "learning_rate": 3.398312361255083e-05, "loss": 2.794, "step": 2180 }, { "epoch": 1.2490873953188748, "grad_norm": 0.47659626603126526, "learning_rate": 3.389348555066539e-05, "loss": 2.7811, "step": 2182 }, { "epoch": 1.2502326247226399, "grad_norm": 0.4527016282081604, "learning_rate": 3.3803905211484154e-05, "loss": 2.8767, "step": 2184 }, { "epoch": 1.2513778541264047, "grad_norm": 0.41616103053092957, "learning_rate": 3.3714382916046116e-05, "loss": 2.8222, "step": 2186 }, { "epoch": 1.2525230835301695, "grad_norm": 0.4452345073223114, "learning_rate": 3.3624918985182266e-05, "loss": 2.9049, "step": 2188 }, { "epoch": 1.2536683129339345, "grad_norm": 0.4622010588645935, "learning_rate": 3.353551373951442e-05, "loss": 2.7793, "step": 2190 }, { "epoch": 1.2548135423376996, "grad_norm": 0.4598563611507416, "learning_rate": 3.3446167499454104e-05, "loss": 2.8283, "step": 2192 }, { "epoch": 1.2559587717414644, "grad_norm": 0.5354582071304321, "learning_rate": 3.3356880585201366e-05, "loss": 2.8343, "step": 2194 }, { "epoch": 1.2571040011452295, "grad_norm": 0.5059964656829834, "learning_rate": 3.3267653316743605e-05, "loss": 2.8377, "step": 2196 }, { "epoch": 1.2582492305489943, "grad_norm": 0.4470852017402649, "learning_rate": 3.3178486013854514e-05, "loss": 2.8321, "step": 2198 }, { "epoch": 1.2593944599527593, "grad_norm": 0.45256906747817993, "learning_rate": 3.3089378996092825e-05, "loss": 2.7911, "step": 2200 }, { "epoch": 1.2605396893565242, "grad_norm": 0.46151721477508545, "learning_rate": 3.300033258280129e-05, "loss": 2.906, "step": 2202 }, { "epoch": 1.2616849187602892, "grad_norm": 0.47998738288879395, "learning_rate": 3.291134709310541e-05, "loss": 2.8667, "step": 2204 }, { "epoch": 1.262830148164054, "grad_norm": 0.4396640956401825, "learning_rate": 3.282242284591236e-05, "loss": 2.853, "step": 2206 }, { "epoch": 1.263975377567819, "grad_norm": 0.43934157490730286, "learning_rate": 3.273356015990985e-05, "loss": 2.7609, "step": 2208 }, { "epoch": 1.2651206069715841, "grad_norm": 0.4398691952228546, "learning_rate": 3.2644759353564926e-05, "loss": 2.8223, "step": 2210 }, { "epoch": 1.266265836375349, "grad_norm": 0.4410426616668701, "learning_rate": 3.2556020745122937e-05, "loss": 2.7372, "step": 2212 }, { "epoch": 1.2674110657791138, "grad_norm": 0.4336583912372589, "learning_rate": 3.246734465260628e-05, "loss": 2.9, "step": 2214 }, { "epoch": 1.2685562951828788, "grad_norm": 0.4740573763847351, "learning_rate": 3.237873139381329e-05, "loss": 2.8364, "step": 2216 }, { "epoch": 1.2697015245866439, "grad_norm": 0.48148828744888306, "learning_rate": 3.2290181286317166e-05, "loss": 2.8649, "step": 2218 }, { "epoch": 1.2708467539904087, "grad_norm": 0.4757157564163208, "learning_rate": 3.220169464746472e-05, "loss": 2.8397, "step": 2220 }, { "epoch": 1.2719919833941735, "grad_norm": 0.4561164975166321, "learning_rate": 3.21132717943754e-05, "loss": 2.7837, "step": 2222 }, { "epoch": 1.2731372127979386, "grad_norm": 0.44345030188560486, "learning_rate": 3.202491304393998e-05, "loss": 2.8314, "step": 2224 }, { "epoch": 1.2742824422017036, "grad_norm": 0.45661360025405884, "learning_rate": 3.193661871281951e-05, "loss": 2.9171, "step": 2226 }, { "epoch": 1.2754276716054684, "grad_norm": 0.49097496271133423, "learning_rate": 3.18483891174442e-05, "loss": 2.7819, "step": 2228 }, { "epoch": 1.2765729010092335, "grad_norm": 0.44910290837287903, "learning_rate": 3.176022457401224e-05, "loss": 2.8091, "step": 2230 }, { "epoch": 1.2777181304129983, "grad_norm": 0.43449267745018005, "learning_rate": 3.167212539848871e-05, "loss": 2.8353, "step": 2232 }, { "epoch": 1.2788633598167634, "grad_norm": 0.42733684182167053, "learning_rate": 3.15840919066044e-05, "loss": 2.787, "step": 2234 }, { "epoch": 1.2800085892205282, "grad_norm": 0.45791661739349365, "learning_rate": 3.1496124413854705e-05, "loss": 2.82, "step": 2236 }, { "epoch": 1.2811538186242932, "grad_norm": 0.44692790508270264, "learning_rate": 3.1408223235498495e-05, "loss": 2.8223, "step": 2238 }, { "epoch": 1.282299048028058, "grad_norm": 0.4272943139076233, "learning_rate": 3.1320388686556965e-05, "loss": 2.8738, "step": 2240 }, { "epoch": 1.283444277431823, "grad_norm": 0.4420345425605774, "learning_rate": 3.123262108181259e-05, "loss": 2.7926, "step": 2242 }, { "epoch": 1.284589506835588, "grad_norm": 0.4313000440597534, "learning_rate": 3.114492073580785e-05, "loss": 2.831, "step": 2244 }, { "epoch": 1.285734736239353, "grad_norm": 0.43327853083610535, "learning_rate": 3.105728796284421e-05, "loss": 2.7826, "step": 2246 }, { "epoch": 1.2868799656431178, "grad_norm": 0.4268853962421417, "learning_rate": 3.096972307698097e-05, "loss": 2.8189, "step": 2248 }, { "epoch": 1.2880251950468828, "grad_norm": 0.4504855275154114, "learning_rate": 3.088222639203413e-05, "loss": 2.8535, "step": 2250 }, { "epoch": 1.289170424450648, "grad_norm": 0.4626159965991974, "learning_rate": 3.0794798221575294e-05, "loss": 2.8214, "step": 2252 }, { "epoch": 1.2903156538544127, "grad_norm": 0.44871020317077637, "learning_rate": 3.0707438878930494e-05, "loss": 2.804, "step": 2254 }, { "epoch": 1.2914608832581775, "grad_norm": 0.4318138659000397, "learning_rate": 3.06201486771791e-05, "loss": 2.8279, "step": 2256 }, { "epoch": 1.2926061126619426, "grad_norm": 0.43403252959251404, "learning_rate": 3.0532927929152704e-05, "loss": 2.8343, "step": 2258 }, { "epoch": 1.2937513420657076, "grad_norm": 0.4334167540073395, "learning_rate": 3.0445776947433968e-05, "loss": 2.8673, "step": 2260 }, { "epoch": 1.2948965714694725, "grad_norm": 0.476735919713974, "learning_rate": 3.0358696044355572e-05, "loss": 2.8517, "step": 2262 }, { "epoch": 1.2960418008732373, "grad_norm": 0.4638877511024475, "learning_rate": 3.0271685531999018e-05, "loss": 2.8453, "step": 2264 }, { "epoch": 1.2971870302770023, "grad_norm": 0.4613761007785797, "learning_rate": 3.0184745722193514e-05, "loss": 2.7953, "step": 2266 }, { "epoch": 1.2983322596807674, "grad_norm": 0.46434667706489563, "learning_rate": 3.0097876926514924e-05, "loss": 2.822, "step": 2268 }, { "epoch": 1.2994774890845322, "grad_norm": 0.44353625178337097, "learning_rate": 3.001107945628458e-05, "loss": 2.8652, "step": 2270 }, { "epoch": 1.3006227184882972, "grad_norm": 0.4787918031215668, "learning_rate": 2.9924353622568246e-05, "loss": 2.8832, "step": 2272 }, { "epoch": 1.301767947892062, "grad_norm": 0.45185986161231995, "learning_rate": 2.9837699736174903e-05, "loss": 2.7848, "step": 2274 }, { "epoch": 1.3029131772958271, "grad_norm": 0.4329104423522949, "learning_rate": 2.9751118107655705e-05, "loss": 2.8175, "step": 2276 }, { "epoch": 1.304058406699592, "grad_norm": 0.48424869775772095, "learning_rate": 2.9664609047302848e-05, "loss": 2.8225, "step": 2278 }, { "epoch": 1.305203636103357, "grad_norm": 0.44777345657348633, "learning_rate": 2.9578172865148452e-05, "loss": 2.8574, "step": 2280 }, { "epoch": 1.3063488655071218, "grad_norm": 0.4826993942260742, "learning_rate": 2.949180987096347e-05, "loss": 2.8948, "step": 2282 }, { "epoch": 1.3074940949108869, "grad_norm": 0.49016353487968445, "learning_rate": 2.9405520374256557e-05, "loss": 2.8077, "step": 2284 }, { "epoch": 1.308639324314652, "grad_norm": 0.4361143708229065, "learning_rate": 2.931930468427295e-05, "loss": 2.8051, "step": 2286 }, { "epoch": 1.3097845537184167, "grad_norm": 0.45759907364845276, "learning_rate": 2.92331631099934e-05, "loss": 2.7944, "step": 2288 }, { "epoch": 1.3109297831221816, "grad_norm": 0.42557355761528015, "learning_rate": 2.9147095960133e-05, "loss": 2.8121, "step": 2290 }, { "epoch": 1.3120750125259466, "grad_norm": 0.446831613779068, "learning_rate": 2.90611035431402e-05, "loss": 2.8345, "step": 2292 }, { "epoch": 1.3132202419297117, "grad_norm": 0.43625926971435547, "learning_rate": 2.8975186167195557e-05, "loss": 2.8493, "step": 2294 }, { "epoch": 1.3143654713334765, "grad_norm": 0.4267670810222626, "learning_rate": 2.8889344140210707e-05, "loss": 2.8082, "step": 2296 }, { "epoch": 1.3155107007372413, "grad_norm": 0.44582757353782654, "learning_rate": 2.8803577769827222e-05, "loss": 2.809, "step": 2298 }, { "epoch": 1.3166559301410063, "grad_norm": 0.4613340198993683, "learning_rate": 2.871788736341562e-05, "loss": 2.8588, "step": 2300 }, { "epoch": 1.3178011595447714, "grad_norm": 0.4537021517753601, "learning_rate": 2.8632273228074104e-05, "loss": 2.8606, "step": 2302 }, { "epoch": 1.3189463889485362, "grad_norm": 0.48666879534721375, "learning_rate": 2.8546735670627546e-05, "loss": 2.8148, "step": 2304 }, { "epoch": 1.3200916183523013, "grad_norm": 0.4481780529022217, "learning_rate": 2.8461274997626403e-05, "loss": 2.8373, "step": 2306 }, { "epoch": 1.321236847756066, "grad_norm": 0.4139377176761627, "learning_rate": 2.837589151534557e-05, "loss": 2.7944, "step": 2308 }, { "epoch": 1.3223820771598311, "grad_norm": 0.4229056239128113, "learning_rate": 2.829058552978329e-05, "loss": 2.8326, "step": 2310 }, { "epoch": 1.323527306563596, "grad_norm": 0.41402408480644226, "learning_rate": 2.8205357346660145e-05, "loss": 2.8933, "step": 2312 }, { "epoch": 1.324672535967361, "grad_norm": 0.44987326860427856, "learning_rate": 2.8120207271417808e-05, "loss": 2.8182, "step": 2314 }, { "epoch": 1.3258177653711258, "grad_norm": 0.4324330985546112, "learning_rate": 2.8035135609218067e-05, "loss": 2.816, "step": 2316 }, { "epoch": 1.3269629947748909, "grad_norm": 0.425658255815506, "learning_rate": 2.795014266494167e-05, "loss": 2.8994, "step": 2318 }, { "epoch": 1.3281082241786557, "grad_norm": 0.4927752614021301, "learning_rate": 2.7865228743187283e-05, "loss": 2.8343, "step": 2320 }, { "epoch": 1.3292534535824208, "grad_norm": 0.4771329164505005, "learning_rate": 2.778039414827035e-05, "loss": 2.8507, "step": 2322 }, { "epoch": 1.3303986829861856, "grad_norm": 0.443246066570282, "learning_rate": 2.7695639184222022e-05, "loss": 2.8005, "step": 2324 }, { "epoch": 1.3315439123899506, "grad_norm": 0.4310682713985443, "learning_rate": 2.7610964154788087e-05, "loss": 2.8282, "step": 2326 }, { "epoch": 1.3326891417937157, "grad_norm": 0.4329923093318939, "learning_rate": 2.7526369363427824e-05, "loss": 2.8712, "step": 2328 }, { "epoch": 1.3338343711974805, "grad_norm": 0.44394516944885254, "learning_rate": 2.744185511331302e-05, "loss": 2.8028, "step": 2330 }, { "epoch": 1.3349796006012453, "grad_norm": 0.4438163936138153, "learning_rate": 2.7357421707326746e-05, "loss": 2.8461, "step": 2332 }, { "epoch": 1.3361248300050104, "grad_norm": 0.4866315722465515, "learning_rate": 2.7273069448062394e-05, "loss": 2.7808, "step": 2334 }, { "epoch": 1.3372700594087754, "grad_norm": 0.4678270220756531, "learning_rate": 2.718879863782251e-05, "loss": 2.7581, "step": 2336 }, { "epoch": 1.3384152888125402, "grad_norm": 0.4453393816947937, "learning_rate": 2.7104609578617733e-05, "loss": 2.7943, "step": 2338 }, { "epoch": 1.3395605182163053, "grad_norm": 0.4810028374195099, "learning_rate": 2.7020502572165784e-05, "loss": 2.7861, "step": 2340 }, { "epoch": 1.34070574762007, "grad_norm": 0.4751345217227936, "learning_rate": 2.6936477919890258e-05, "loss": 2.8862, "step": 2342 }, { "epoch": 1.3418509770238352, "grad_norm": 0.4226936101913452, "learning_rate": 2.6852535922919642e-05, "loss": 2.8343, "step": 2344 }, { "epoch": 1.3429962064276, "grad_norm": 0.4143749177455902, "learning_rate": 2.6768676882086174e-05, "loss": 2.8712, "step": 2346 }, { "epoch": 1.344141435831365, "grad_norm": 0.45184242725372314, "learning_rate": 2.6684901097924803e-05, "loss": 2.8036, "step": 2348 }, { "epoch": 1.3452866652351299, "grad_norm": 0.4601394832134247, "learning_rate": 2.6601208870672147e-05, "loss": 2.7984, "step": 2350 }, { "epoch": 1.346431894638895, "grad_norm": 0.426993191242218, "learning_rate": 2.6517600500265304e-05, "loss": 2.7911, "step": 2352 }, { "epoch": 1.3475771240426597, "grad_norm": 0.4215984046459198, "learning_rate": 2.643407628634088e-05, "loss": 2.829, "step": 2354 }, { "epoch": 1.3487223534464248, "grad_norm": 0.43159547448158264, "learning_rate": 2.6350636528233886e-05, "loss": 2.8665, "step": 2356 }, { "epoch": 1.3498675828501896, "grad_norm": 0.4220997989177704, "learning_rate": 2.6267281524976607e-05, "loss": 2.8259, "step": 2358 }, { "epoch": 1.3510128122539546, "grad_norm": 0.4615905284881592, "learning_rate": 2.618401157529768e-05, "loss": 2.8137, "step": 2360 }, { "epoch": 1.3521580416577197, "grad_norm": 0.4787854552268982, "learning_rate": 2.610082697762084e-05, "loss": 2.8423, "step": 2362 }, { "epoch": 1.3533032710614845, "grad_norm": 0.4455800950527191, "learning_rate": 2.601772803006397e-05, "loss": 2.8511, "step": 2364 }, { "epoch": 1.3544485004652493, "grad_norm": 0.42842209339141846, "learning_rate": 2.5934715030437995e-05, "loss": 2.8418, "step": 2366 }, { "epoch": 1.3555937298690144, "grad_norm": 0.4433073103427887, "learning_rate": 2.5851788276245816e-05, "loss": 2.8793, "step": 2368 }, { "epoch": 1.3567389592727794, "grad_norm": 0.4123075008392334, "learning_rate": 2.5768948064681246e-05, "loss": 2.8262, "step": 2370 }, { "epoch": 1.3578841886765443, "grad_norm": 0.4401339590549469, "learning_rate": 2.5686194692627964e-05, "loss": 2.8251, "step": 2372 }, { "epoch": 1.359029418080309, "grad_norm": 0.4036884903907776, "learning_rate": 2.56035284566584e-05, "loss": 2.8509, "step": 2374 }, { "epoch": 1.3601746474840741, "grad_norm": 0.4059266149997711, "learning_rate": 2.5520949653032738e-05, "loss": 2.8484, "step": 2376 }, { "epoch": 1.3613198768878392, "grad_norm": 0.42286744713783264, "learning_rate": 2.5438458577697777e-05, "loss": 2.7749, "step": 2378 }, { "epoch": 1.362465106291604, "grad_norm": 0.4564734697341919, "learning_rate": 2.5356055526285988e-05, "loss": 2.8331, "step": 2380 }, { "epoch": 1.363610335695369, "grad_norm": 0.4251658618450165, "learning_rate": 2.5273740794114307e-05, "loss": 2.766, "step": 2382 }, { "epoch": 1.3647555650991339, "grad_norm": 0.44014859199523926, "learning_rate": 2.5191514676183192e-05, "loss": 2.7856, "step": 2384 }, { "epoch": 1.365900794502899, "grad_norm": 0.43395334482192993, "learning_rate": 2.51093774671755e-05, "loss": 2.798, "step": 2386 }, { "epoch": 1.3670460239066637, "grad_norm": 0.43457454442977905, "learning_rate": 2.502732946145545e-05, "loss": 2.7975, "step": 2388 }, { "epoch": 1.3681912533104288, "grad_norm": 0.43714475631713867, "learning_rate": 2.4945370953067633e-05, "loss": 2.8554, "step": 2390 }, { "epoch": 1.3693364827141936, "grad_norm": 0.4291505217552185, "learning_rate": 2.4863502235735838e-05, "loss": 2.7629, "step": 2392 }, { "epoch": 1.3704817121179587, "grad_norm": 0.42168352007865906, "learning_rate": 2.4781723602862066e-05, "loss": 2.8164, "step": 2394 }, { "epoch": 1.3716269415217237, "grad_norm": 0.4350284934043884, "learning_rate": 2.4700035347525495e-05, "loss": 2.8473, "step": 2396 }, { "epoch": 1.3727721709254885, "grad_norm": 0.3933637738227844, "learning_rate": 2.461843776248138e-05, "loss": 2.7674, "step": 2398 }, { "epoch": 1.3739174003292534, "grad_norm": 0.4060092270374298, "learning_rate": 2.453693114016007e-05, "loss": 2.8054, "step": 2400 }, { "epoch": 1.3750626297330184, "grad_norm": 0.42997944355010986, "learning_rate": 2.4455515772665888e-05, "loss": 2.8384, "step": 2402 }, { "epoch": 1.3762078591367835, "grad_norm": 0.4723321795463562, "learning_rate": 2.4374191951776127e-05, "loss": 2.8137, "step": 2404 }, { "epoch": 1.3773530885405483, "grad_norm": 0.43630415201187134, "learning_rate": 2.4292959968939993e-05, "loss": 2.7938, "step": 2406 }, { "epoch": 1.378498317944313, "grad_norm": 0.4638310372829437, "learning_rate": 2.421182011527754e-05, "loss": 2.8042, "step": 2408 }, { "epoch": 1.3796435473480781, "grad_norm": 0.455529123544693, "learning_rate": 2.4130772681578705e-05, "loss": 2.8098, "step": 2410 }, { "epoch": 1.3807887767518432, "grad_norm": 0.4631556570529938, "learning_rate": 2.4049817958302168e-05, "loss": 2.7336, "step": 2412 }, { "epoch": 1.381934006155608, "grad_norm": 0.4377457797527313, "learning_rate": 2.3968956235574354e-05, "loss": 2.7679, "step": 2414 }, { "epoch": 1.383079235559373, "grad_norm": 0.40758460760116577, "learning_rate": 2.3888187803188388e-05, "loss": 2.8313, "step": 2416 }, { "epoch": 1.384224464963138, "grad_norm": 0.41741126775741577, "learning_rate": 2.3807512950603082e-05, "loss": 2.8881, "step": 2418 }, { "epoch": 1.385369694366903, "grad_norm": 0.43378809094429016, "learning_rate": 2.372693196694186e-05, "loss": 2.8, "step": 2420 }, { "epoch": 1.3865149237706678, "grad_norm": 0.44759225845336914, "learning_rate": 2.3646445140991742e-05, "loss": 2.7743, "step": 2422 }, { "epoch": 1.3876601531744328, "grad_norm": 0.424991250038147, "learning_rate": 2.3566052761202296e-05, "loss": 2.8547, "step": 2424 }, { "epoch": 1.3888053825781976, "grad_norm": 0.41585108637809753, "learning_rate": 2.348575511568462e-05, "loss": 2.8035, "step": 2426 }, { "epoch": 1.3899506119819627, "grad_norm": 0.44614076614379883, "learning_rate": 2.3405552492210287e-05, "loss": 2.8011, "step": 2428 }, { "epoch": 1.3910958413857275, "grad_norm": 0.4364294409751892, "learning_rate": 2.332544517821038e-05, "loss": 2.7666, "step": 2430 }, { "epoch": 1.3922410707894926, "grad_norm": 0.44953617453575134, "learning_rate": 2.3245433460774363e-05, "loss": 2.7605, "step": 2432 }, { "epoch": 1.3933863001932574, "grad_norm": 0.44581037759780884, "learning_rate": 2.3165517626649103e-05, "loss": 2.8071, "step": 2434 }, { "epoch": 1.3945315295970224, "grad_norm": 0.38801848888397217, "learning_rate": 2.3085697962237844e-05, "loss": 2.7572, "step": 2436 }, { "epoch": 1.3956767590007875, "grad_norm": 0.41832783818244934, "learning_rate": 2.3005974753599174e-05, "loss": 2.8419, "step": 2438 }, { "epoch": 1.3968219884045523, "grad_norm": 0.47063156962394714, "learning_rate": 2.2926348286446037e-05, "loss": 2.7487, "step": 2440 }, { "epoch": 1.3979672178083171, "grad_norm": 0.41708147525787354, "learning_rate": 2.284681884614463e-05, "loss": 2.8393, "step": 2442 }, { "epoch": 1.3991124472120822, "grad_norm": 0.4102913737297058, "learning_rate": 2.2767386717713435e-05, "loss": 2.8448, "step": 2444 }, { "epoch": 1.4002576766158472, "grad_norm": 0.41126230359077454, "learning_rate": 2.2688052185822196e-05, "loss": 2.7864, "step": 2446 }, { "epoch": 1.401402906019612, "grad_norm": 0.4447810649871826, "learning_rate": 2.2608815534790857e-05, "loss": 2.8258, "step": 2448 }, { "epoch": 1.4025481354233769, "grad_norm": 0.477117657661438, "learning_rate": 2.2529677048588638e-05, "loss": 2.7788, "step": 2450 }, { "epoch": 1.403693364827142, "grad_norm": 0.4645892083644867, "learning_rate": 2.2450637010832914e-05, "loss": 2.8313, "step": 2452 }, { "epoch": 1.404838594230907, "grad_norm": 0.40552330017089844, "learning_rate": 2.2371695704788232e-05, "loss": 2.752, "step": 2454 }, { "epoch": 1.4059838236346718, "grad_norm": 0.4056200385093689, "learning_rate": 2.2292853413365322e-05, "loss": 2.8012, "step": 2456 }, { "epoch": 1.4071290530384368, "grad_norm": 0.47695791721343994, "learning_rate": 2.221411041912004e-05, "loss": 2.7706, "step": 2458 }, { "epoch": 1.4082742824422017, "grad_norm": 0.48394089937210083, "learning_rate": 2.2135467004252425e-05, "loss": 2.855, "step": 2460 }, { "epoch": 1.4094195118459667, "grad_norm": 0.43482139706611633, "learning_rate": 2.2056923450605606e-05, "loss": 2.8092, "step": 2462 }, { "epoch": 1.4105647412497315, "grad_norm": 0.4180615246295929, "learning_rate": 2.1978480039664833e-05, "loss": 2.7507, "step": 2464 }, { "epoch": 1.4117099706534966, "grad_norm": 0.44258221983909607, "learning_rate": 2.1900137052556475e-05, "loss": 2.8374, "step": 2466 }, { "epoch": 1.4128552000572614, "grad_norm": 0.5226263403892517, "learning_rate": 2.182189477004698e-05, "loss": 2.8863, "step": 2468 }, { "epoch": 1.4140004294610264, "grad_norm": 0.46159401535987854, "learning_rate": 2.1743753472541915e-05, "loss": 2.8606, "step": 2470 }, { "epoch": 1.4151456588647915, "grad_norm": 0.416216641664505, "learning_rate": 2.1665713440084923e-05, "loss": 2.7817, "step": 2472 }, { "epoch": 1.4162908882685563, "grad_norm": 0.4066028296947479, "learning_rate": 2.1587774952356726e-05, "loss": 2.7824, "step": 2474 }, { "epoch": 1.4174361176723211, "grad_norm": 0.4143048822879791, "learning_rate": 2.150993828867414e-05, "loss": 2.7889, "step": 2476 }, { "epoch": 1.4185813470760862, "grad_norm": 0.4550580382347107, "learning_rate": 2.1432203727989042e-05, "loss": 2.8108, "step": 2478 }, { "epoch": 1.4197265764798512, "grad_norm": 0.4116652011871338, "learning_rate": 2.1354571548887448e-05, "loss": 2.7389, "step": 2480 }, { "epoch": 1.420871805883616, "grad_norm": 0.4498264491558075, "learning_rate": 2.1277042029588397e-05, "loss": 2.8583, "step": 2482 }, { "epoch": 1.4220170352873809, "grad_norm": 0.4109775722026825, "learning_rate": 2.1199615447943027e-05, "loss": 2.7984, "step": 2484 }, { "epoch": 1.423162264691146, "grad_norm": 0.4187685549259186, "learning_rate": 2.112229208143357e-05, "loss": 2.7972, "step": 2486 }, { "epoch": 1.424307494094911, "grad_norm": 0.41403427720069885, "learning_rate": 2.1045072207172397e-05, "loss": 2.7937, "step": 2488 }, { "epoch": 1.4254527234986758, "grad_norm": 0.4104973375797272, "learning_rate": 2.096795610190091e-05, "loss": 2.7791, "step": 2490 }, { "epoch": 1.4265979529024408, "grad_norm": 0.40087735652923584, "learning_rate": 2.089094404198868e-05, "loss": 2.8392, "step": 2492 }, { "epoch": 1.4277431823062057, "grad_norm": 0.3889038562774658, "learning_rate": 2.081403630343236e-05, "loss": 2.8462, "step": 2494 }, { "epoch": 1.4288884117099707, "grad_norm": 0.4070858955383301, "learning_rate": 2.0737233161854747e-05, "loss": 2.8283, "step": 2496 }, { "epoch": 1.4300336411137355, "grad_norm": 0.41707080602645874, "learning_rate": 2.0660534892503813e-05, "loss": 2.8241, "step": 2498 }, { "epoch": 1.4311788705175006, "grad_norm": 0.40596872568130493, "learning_rate": 2.0583941770251647e-05, "loss": 2.7743, "step": 2500 }, { "epoch": 1.4323240999212654, "grad_norm": 0.41656824946403503, "learning_rate": 2.050745406959353e-05, "loss": 2.8279, "step": 2502 }, { "epoch": 1.4334693293250305, "grad_norm": 0.4031243622303009, "learning_rate": 2.043107206464692e-05, "loss": 2.793, "step": 2504 }, { "epoch": 1.4346145587287953, "grad_norm": 0.39451080560684204, "learning_rate": 2.035479602915048e-05, "loss": 2.8211, "step": 2506 }, { "epoch": 1.4357597881325603, "grad_norm": 0.40356582403182983, "learning_rate": 2.0278626236463132e-05, "loss": 2.8192, "step": 2508 }, { "epoch": 1.4369050175363252, "grad_norm": 0.40355321764945984, "learning_rate": 2.0202562959563014e-05, "loss": 2.7673, "step": 2510 }, { "epoch": 1.4380502469400902, "grad_norm": 0.38622865080833435, "learning_rate": 2.0126606471046533e-05, "loss": 2.7961, "step": 2512 }, { "epoch": 1.4391954763438553, "grad_norm": 0.41536667943000793, "learning_rate": 2.0050757043127384e-05, "loss": 2.874, "step": 2514 }, { "epoch": 1.44034070574762, "grad_norm": 0.43348467350006104, "learning_rate": 1.997501494763559e-05, "loss": 2.8313, "step": 2516 }, { "epoch": 1.441485935151385, "grad_norm": 0.4242289066314697, "learning_rate": 1.9899380456016508e-05, "loss": 2.7851, "step": 2518 }, { "epoch": 1.44263116455515, "grad_norm": 0.36732569336891174, "learning_rate": 1.982385383932986e-05, "loss": 2.7494, "step": 2520 }, { "epoch": 1.443776393958915, "grad_norm": 0.4104781150817871, "learning_rate": 1.9748435368248774e-05, "loss": 2.7358, "step": 2522 }, { "epoch": 1.4449216233626798, "grad_norm": 0.4690151810646057, "learning_rate": 1.9673125313058795e-05, "loss": 2.8255, "step": 2524 }, { "epoch": 1.4460668527664446, "grad_norm": 0.4592035710811615, "learning_rate": 1.959792394365691e-05, "loss": 2.8087, "step": 2526 }, { "epoch": 1.4472120821702097, "grad_norm": 0.4037526249885559, "learning_rate": 1.9522831529550657e-05, "loss": 2.8022, "step": 2528 }, { "epoch": 1.4483573115739747, "grad_norm": 0.4069378077983856, "learning_rate": 1.944784833985704e-05, "loss": 2.7445, "step": 2530 }, { "epoch": 1.4495025409777396, "grad_norm": 0.4676271975040436, "learning_rate": 1.9372974643301633e-05, "loss": 2.8313, "step": 2532 }, { "epoch": 1.4506477703815046, "grad_norm": 0.4223306179046631, "learning_rate": 1.929821070821763e-05, "loss": 2.8118, "step": 2534 }, { "epoch": 1.4517929997852694, "grad_norm": 0.3974757492542267, "learning_rate": 1.9223556802544818e-05, "loss": 2.8295, "step": 2536 }, { "epoch": 1.4529382291890345, "grad_norm": 0.43295469880104065, "learning_rate": 1.9149013193828726e-05, "loss": 2.8666, "step": 2538 }, { "epoch": 1.4540834585927993, "grad_norm": 0.40494778752326965, "learning_rate": 1.907458014921954e-05, "loss": 2.8302, "step": 2540 }, { "epoch": 1.4552286879965644, "grad_norm": 0.42885714769363403, "learning_rate": 1.9000257935471234e-05, "loss": 2.7867, "step": 2542 }, { "epoch": 1.4563739174003292, "grad_norm": 0.4137801229953766, "learning_rate": 1.8926046818940575e-05, "loss": 2.7869, "step": 2544 }, { "epoch": 1.4575191468040942, "grad_norm": 0.4357258081436157, "learning_rate": 1.885194706558616e-05, "loss": 2.7846, "step": 2546 }, { "epoch": 1.4586643762078593, "grad_norm": 0.39982470870018005, "learning_rate": 1.8777958940967543e-05, "loss": 2.8071, "step": 2548 }, { "epoch": 1.459809605611624, "grad_norm": 0.41449084877967834, "learning_rate": 1.8704082710244164e-05, "loss": 2.8224, "step": 2550 }, { "epoch": 1.460954835015389, "grad_norm": 0.3891531825065613, "learning_rate": 1.8630318638174466e-05, "loss": 2.8059, "step": 2552 }, { "epoch": 1.462100064419154, "grad_norm": 0.399565726518631, "learning_rate": 1.855666698911495e-05, "loss": 2.7972, "step": 2554 }, { "epoch": 1.463245293822919, "grad_norm": 0.4340634346008301, "learning_rate": 1.848312802701919e-05, "loss": 2.8914, "step": 2556 }, { "epoch": 1.4643905232266838, "grad_norm": 0.3744601011276245, "learning_rate": 1.8409702015436975e-05, "loss": 2.7478, "step": 2558 }, { "epoch": 1.4655357526304487, "grad_norm": 0.40655162930488586, "learning_rate": 1.8336389217513215e-05, "loss": 2.7858, "step": 2560 }, { "epoch": 1.4666809820342137, "grad_norm": 0.41393396258354187, "learning_rate": 1.8263189895987147e-05, "loss": 2.8087, "step": 2562 }, { "epoch": 1.4678262114379788, "grad_norm": 0.40923526883125305, "learning_rate": 1.819010431319129e-05, "loss": 2.781, "step": 2564 }, { "epoch": 1.4689714408417436, "grad_norm": 0.4070257544517517, "learning_rate": 1.811713273105058e-05, "loss": 2.8785, "step": 2566 }, { "epoch": 1.4701166702455086, "grad_norm": 0.4043235182762146, "learning_rate": 1.8044275411081357e-05, "loss": 2.777, "step": 2568 }, { "epoch": 1.4712618996492735, "grad_norm": 0.4282217025756836, "learning_rate": 1.7971532614390506e-05, "loss": 2.842, "step": 2570 }, { "epoch": 1.4724071290530385, "grad_norm": 0.3940590023994446, "learning_rate": 1.7898904601674455e-05, "loss": 2.7996, "step": 2572 }, { "epoch": 1.4735523584568033, "grad_norm": 0.39545518159866333, "learning_rate": 1.7826391633218282e-05, "loss": 2.8039, "step": 2574 }, { "epoch": 1.4746975878605684, "grad_norm": 0.395147442817688, "learning_rate": 1.7753993968894738e-05, "loss": 2.753, "step": 2576 }, { "epoch": 1.4758428172643332, "grad_norm": 0.384915828704834, "learning_rate": 1.7681711868163415e-05, "loss": 2.8487, "step": 2578 }, { "epoch": 1.4769880466680982, "grad_norm": 0.42302560806274414, "learning_rate": 1.760954559006968e-05, "loss": 2.8235, "step": 2580 }, { "epoch": 1.478133276071863, "grad_norm": 0.39595770835876465, "learning_rate": 1.753749539324384e-05, "loss": 2.7669, "step": 2582 }, { "epoch": 1.4792785054756281, "grad_norm": 0.39236024022102356, "learning_rate": 1.746556153590019e-05, "loss": 2.8432, "step": 2584 }, { "epoch": 1.480423734879393, "grad_norm": 0.42855557799339294, "learning_rate": 1.7393744275836055e-05, "loss": 2.7911, "step": 2586 }, { "epoch": 1.481568964283158, "grad_norm": 0.4045625329017639, "learning_rate": 1.732204387043096e-05, "loss": 2.7989, "step": 2588 }, { "epoch": 1.482714193686923, "grad_norm": 0.3734433948993683, "learning_rate": 1.7250460576645595e-05, "loss": 2.7727, "step": 2590 }, { "epoch": 1.4838594230906879, "grad_norm": 0.40953153371810913, "learning_rate": 1.7178994651020958e-05, "loss": 2.8327, "step": 2592 }, { "epoch": 1.4850046524944527, "grad_norm": 0.4236595332622528, "learning_rate": 1.710764634967742e-05, "loss": 2.8549, "step": 2594 }, { "epoch": 1.4861498818982177, "grad_norm": 0.40529724955558777, "learning_rate": 1.7036415928313798e-05, "loss": 2.8287, "step": 2596 }, { "epoch": 1.4872951113019828, "grad_norm": 0.41257691383361816, "learning_rate": 1.6965303642206486e-05, "loss": 2.8027, "step": 2598 }, { "epoch": 1.4884403407057476, "grad_norm": 0.3748761713504791, "learning_rate": 1.6894309746208474e-05, "loss": 2.7456, "step": 2600 }, { "epoch": 1.4895855701095124, "grad_norm": 0.39214664697647095, "learning_rate": 1.6823434494748463e-05, "loss": 2.7459, "step": 2602 }, { "epoch": 1.4907307995132775, "grad_norm": 0.4161108434200287, "learning_rate": 1.6752678141829964e-05, "loss": 2.8085, "step": 2604 }, { "epoch": 1.4918760289170425, "grad_norm": 0.39803674817085266, "learning_rate": 1.668204094103036e-05, "loss": 2.8011, "step": 2606 }, { "epoch": 1.4930212583208073, "grad_norm": 0.4179908037185669, "learning_rate": 1.6611523145500058e-05, "loss": 2.8638, "step": 2608 }, { "epoch": 1.4941664877245724, "grad_norm": 0.4226784408092499, "learning_rate": 1.65411250079615e-05, "loss": 2.7662, "step": 2610 }, { "epoch": 1.4953117171283372, "grad_norm": 0.40659400820732117, "learning_rate": 1.6470846780708303e-05, "loss": 2.8241, "step": 2612 }, { "epoch": 1.4964569465321023, "grad_norm": 0.384813517332077, "learning_rate": 1.640068871560436e-05, "loss": 2.7765, "step": 2614 }, { "epoch": 1.497602175935867, "grad_norm": 0.38852763175964355, "learning_rate": 1.6330651064082913e-05, "loss": 2.8206, "step": 2616 }, { "epoch": 1.4987474053396321, "grad_norm": 0.38395917415618896, "learning_rate": 1.626073407714568e-05, "loss": 2.8097, "step": 2618 }, { "epoch": 1.499892634743397, "grad_norm": 0.3944055140018463, "learning_rate": 1.6190938005361917e-05, "loss": 2.7749, "step": 2620 }, { "epoch": 1.501037864147162, "grad_norm": 0.42734283208847046, "learning_rate": 1.6121263098867562e-05, "loss": 2.7747, "step": 2622 }, { "epoch": 1.502183093550927, "grad_norm": 0.3883204460144043, "learning_rate": 1.605170960736432e-05, "loss": 2.7929, "step": 2624 }, { "epoch": 1.5033283229546919, "grad_norm": 0.38834232091903687, "learning_rate": 1.5982277780118732e-05, "loss": 2.7402, "step": 2626 }, { "epoch": 1.5044735523584567, "grad_norm": 0.3826979696750641, "learning_rate": 1.5912967865961387e-05, "loss": 2.7687, "step": 2628 }, { "epoch": 1.5056187817622217, "grad_norm": 0.39274129271507263, "learning_rate": 1.5843780113285904e-05, "loss": 2.7995, "step": 2630 }, { "epoch": 1.5067640111659868, "grad_norm": 0.4306257963180542, "learning_rate": 1.5774714770048116e-05, "loss": 2.8761, "step": 2632 }, { "epoch": 1.5079092405697516, "grad_norm": 0.4022698402404785, "learning_rate": 1.5705772083765156e-05, "loss": 2.7287, "step": 2634 }, { "epoch": 1.5090544699735164, "grad_norm": 0.4072604179382324, "learning_rate": 1.5636952301514573e-05, "loss": 2.8105, "step": 2636 }, { "epoch": 1.5101996993772815, "grad_norm": 0.40660110116004944, "learning_rate": 1.5568255669933494e-05, "loss": 2.7828, "step": 2638 }, { "epoch": 1.5113449287810465, "grad_norm": 0.40150290727615356, "learning_rate": 1.5499682435217645e-05, "loss": 2.7701, "step": 2640 }, { "epoch": 1.5124901581848114, "grad_norm": 0.4095585346221924, "learning_rate": 1.5431232843120546e-05, "loss": 2.7758, "step": 2642 }, { "epoch": 1.5136353875885762, "grad_norm": 0.42853084206581116, "learning_rate": 1.536290713895259e-05, "loss": 2.818, "step": 2644 }, { "epoch": 1.5147806169923412, "grad_norm": 0.3920649290084839, "learning_rate": 1.529470556758019e-05, "loss": 2.7709, "step": 2646 }, { "epoch": 1.5159258463961063, "grad_norm": 0.40265950560569763, "learning_rate": 1.5226628373424911e-05, "loss": 2.8257, "step": 2648 }, { "epoch": 1.5170710757998713, "grad_norm": 0.39623868465423584, "learning_rate": 1.515867580046254e-05, "loss": 2.7479, "step": 2650 }, { "epoch": 1.5182163052036362, "grad_norm": 0.39726147055625916, "learning_rate": 1.5090848092222255e-05, "loss": 2.8129, "step": 2652 }, { "epoch": 1.519361534607401, "grad_norm": 0.4114530384540558, "learning_rate": 1.502314549178575e-05, "loss": 2.8031, "step": 2654 }, { "epoch": 1.520506764011166, "grad_norm": 0.40079519152641296, "learning_rate": 1.4955568241786328e-05, "loss": 2.7861, "step": 2656 }, { "epoch": 1.521651993414931, "grad_norm": 0.36807724833488464, "learning_rate": 1.4888116584408123e-05, "loss": 2.7981, "step": 2658 }, { "epoch": 1.522797222818696, "grad_norm": 0.41362595558166504, "learning_rate": 1.4820790761385106e-05, "loss": 2.769, "step": 2660 }, { "epoch": 1.5239424522224607, "grad_norm": 0.3776959478855133, "learning_rate": 1.4753591014000312e-05, "loss": 2.8331, "step": 2662 }, { "epoch": 1.5250876816262258, "grad_norm": 0.4130830466747284, "learning_rate": 1.468651758308493e-05, "loss": 2.8113, "step": 2664 }, { "epoch": 1.5262329110299908, "grad_norm": 0.4188506007194519, "learning_rate": 1.4619570709017461e-05, "loss": 2.76, "step": 2666 }, { "epoch": 1.5273781404337556, "grad_norm": 0.40274086594581604, "learning_rate": 1.4552750631722861e-05, "loss": 2.731, "step": 2668 }, { "epoch": 1.5285233698375205, "grad_norm": 0.41274294257164, "learning_rate": 1.4486057590671654e-05, "loss": 2.8248, "step": 2670 }, { "epoch": 1.5296685992412855, "grad_norm": 0.40115487575531006, "learning_rate": 1.4419491824879089e-05, "loss": 2.8578, "step": 2672 }, { "epoch": 1.5308138286450506, "grad_norm": 0.4179574251174927, "learning_rate": 1.4353053572904291e-05, "loss": 2.752, "step": 2674 }, { "epoch": 1.5319590580488154, "grad_norm": 0.39704403281211853, "learning_rate": 1.4286743072849424e-05, "loss": 2.807, "step": 2676 }, { "epoch": 1.5331042874525802, "grad_norm": 0.39814841747283936, "learning_rate": 1.422056056235877e-05, "loss": 2.7909, "step": 2678 }, { "epoch": 1.5342495168563453, "grad_norm": 0.3912595510482788, "learning_rate": 1.4154506278617947e-05, "loss": 2.7471, "step": 2680 }, { "epoch": 1.5353947462601103, "grad_norm": 0.41936007142066956, "learning_rate": 1.4088580458353024e-05, "loss": 2.8128, "step": 2682 }, { "epoch": 1.5365399756638751, "grad_norm": 0.40823835134506226, "learning_rate": 1.4022783337829664e-05, "loss": 2.8238, "step": 2684 }, { "epoch": 1.53768520506764, "grad_norm": 0.41865071654319763, "learning_rate": 1.3957115152852352e-05, "loss": 2.7949, "step": 2686 }, { "epoch": 1.538830434471405, "grad_norm": 0.4251275658607483, "learning_rate": 1.389157613876343e-05, "loss": 2.7633, "step": 2688 }, { "epoch": 1.53997566387517, "grad_norm": 0.4164314568042755, "learning_rate": 1.3826166530442347e-05, "loss": 2.7984, "step": 2690 }, { "epoch": 1.541120893278935, "grad_norm": 0.40024682879447937, "learning_rate": 1.3760886562304776e-05, "loss": 2.7193, "step": 2692 }, { "epoch": 1.5422661226827, "grad_norm": 0.42088523507118225, "learning_rate": 1.3695736468301767e-05, "loss": 2.8136, "step": 2694 }, { "epoch": 1.5434113520864647, "grad_norm": 0.37311938405036926, "learning_rate": 1.3630716481918987e-05, "loss": 2.7978, "step": 2696 }, { "epoch": 1.5445565814902298, "grad_norm": 0.413992315530777, "learning_rate": 1.3565826836175754e-05, "loss": 2.8129, "step": 2698 }, { "epoch": 1.5457018108939948, "grad_norm": 0.39701685309410095, "learning_rate": 1.3501067763624303e-05, "loss": 2.8128, "step": 2700 }, { "epoch": 1.5468470402977597, "grad_norm": 0.38475367426872253, "learning_rate": 1.3436439496348912e-05, "loss": 2.8471, "step": 2702 }, { "epoch": 1.5479922697015245, "grad_norm": 0.3970561921596527, "learning_rate": 1.3371942265965065e-05, "loss": 2.7832, "step": 2704 }, { "epoch": 1.5491374991052895, "grad_norm": 0.3974805772304535, "learning_rate": 1.3307576303618674e-05, "loss": 2.7273, "step": 2706 }, { "epoch": 1.5502827285090546, "grad_norm": 0.4151800572872162, "learning_rate": 1.3243341839985174e-05, "loss": 2.8143, "step": 2708 }, { "epoch": 1.5514279579128194, "grad_norm": 0.4361136555671692, "learning_rate": 1.3179239105268737e-05, "loss": 2.779, "step": 2710 }, { "epoch": 1.5525731873165842, "grad_norm": 0.3736790120601654, "learning_rate": 1.3115268329201458e-05, "loss": 2.8302, "step": 2712 }, { "epoch": 1.5537184167203493, "grad_norm": 0.38528764247894287, "learning_rate": 1.305142974104251e-05, "loss": 2.7994, "step": 2714 }, { "epoch": 1.5548636461241143, "grad_norm": 0.43212321400642395, "learning_rate": 1.2987723569577326e-05, "loss": 2.8126, "step": 2716 }, { "epoch": 1.5560088755278791, "grad_norm": 0.3648909628391266, "learning_rate": 1.2924150043116785e-05, "loss": 2.6923, "step": 2718 }, { "epoch": 1.557154104931644, "grad_norm": 0.39656099677085876, "learning_rate": 1.28607093894964e-05, "loss": 2.7913, "step": 2720 }, { "epoch": 1.558299334335409, "grad_norm": 0.39519429206848145, "learning_rate": 1.2797401836075478e-05, "loss": 2.8459, "step": 2722 }, { "epoch": 1.559444563739174, "grad_norm": 0.37746697664260864, "learning_rate": 1.2734227609736321e-05, "loss": 2.8168, "step": 2724 }, { "epoch": 1.560589793142939, "grad_norm": 0.4118252992630005, "learning_rate": 1.2671186936883434e-05, "loss": 2.7415, "step": 2726 }, { "epoch": 1.561735022546704, "grad_norm": 0.3994660973548889, "learning_rate": 1.2608280043442678e-05, "loss": 2.7259, "step": 2728 }, { "epoch": 1.5628802519504688, "grad_norm": 0.4043809771537781, "learning_rate": 1.2545507154860458e-05, "loss": 2.8936, "step": 2730 }, { "epoch": 1.5640254813542338, "grad_norm": 0.4162848889827728, "learning_rate": 1.2482868496102951e-05, "loss": 2.8088, "step": 2732 }, { "epoch": 1.5651707107579989, "grad_norm": 0.3678242564201355, "learning_rate": 1.2420364291655262e-05, "loss": 2.8305, "step": 2734 }, { "epoch": 1.5663159401617637, "grad_norm": 0.4160403311252594, "learning_rate": 1.2357994765520669e-05, "loss": 2.7427, "step": 2736 }, { "epoch": 1.5674611695655285, "grad_norm": 0.35816919803619385, "learning_rate": 1.229576014121976e-05, "loss": 2.7657, "step": 2738 }, { "epoch": 1.5686063989692935, "grad_norm": 0.3832671046257019, "learning_rate": 1.2233660641789663e-05, "loss": 2.8167, "step": 2740 }, { "epoch": 1.5697516283730586, "grad_norm": 0.3768201768398285, "learning_rate": 1.2171696489783257e-05, "loss": 2.7964, "step": 2742 }, { "epoch": 1.5708968577768234, "grad_norm": 0.3857521116733551, "learning_rate": 1.2109867907268335e-05, "loss": 2.8137, "step": 2744 }, { "epoch": 1.5720420871805882, "grad_norm": 0.3764081299304962, "learning_rate": 1.2048175115826876e-05, "loss": 2.7547, "step": 2746 }, { "epoch": 1.5731873165843533, "grad_norm": 0.37275439500808716, "learning_rate": 1.1986618336554183e-05, "loss": 2.8211, "step": 2748 }, { "epoch": 1.5743325459881183, "grad_norm": 0.3680013120174408, "learning_rate": 1.1925197790058112e-05, "loss": 2.8078, "step": 2750 }, { "epoch": 1.5754777753918832, "grad_norm": 0.3640711307525635, "learning_rate": 1.1863913696458306e-05, "loss": 2.7808, "step": 2752 }, { "epoch": 1.576623004795648, "grad_norm": 0.3780917227268219, "learning_rate": 1.1802766275385346e-05, "loss": 2.7862, "step": 2754 }, { "epoch": 1.577768234199413, "grad_norm": 0.3930152952671051, "learning_rate": 1.1741755745980077e-05, "loss": 2.8192, "step": 2756 }, { "epoch": 1.578913463603178, "grad_norm": 0.38377946615219116, "learning_rate": 1.1680882326892684e-05, "loss": 2.778, "step": 2758 }, { "epoch": 1.580058693006943, "grad_norm": 0.38965606689453125, "learning_rate": 1.1620146236282004e-05, "loss": 2.7755, "step": 2760 }, { "epoch": 1.5812039224107077, "grad_norm": 0.372148334980011, "learning_rate": 1.1559547691814709e-05, "loss": 2.796, "step": 2762 }, { "epoch": 1.5823491518144728, "grad_norm": 0.390490859746933, "learning_rate": 1.1499086910664525e-05, "loss": 2.8439, "step": 2764 }, { "epoch": 1.5834943812182378, "grad_norm": 0.38053470849990845, "learning_rate": 1.1438764109511468e-05, "loss": 2.8045, "step": 2766 }, { "epoch": 1.5846396106220029, "grad_norm": 0.38410815596580505, "learning_rate": 1.137857950454106e-05, "loss": 2.7785, "step": 2768 }, { "epoch": 1.5857848400257677, "grad_norm": 0.4017854332923889, "learning_rate": 1.1318533311443558e-05, "loss": 2.8288, "step": 2770 }, { "epoch": 1.5869300694295325, "grad_norm": 0.3886673152446747, "learning_rate": 1.1258625745413159e-05, "loss": 2.8194, "step": 2772 }, { "epoch": 1.5880752988332976, "grad_norm": 0.3889922797679901, "learning_rate": 1.1198857021147258e-05, "loss": 2.8282, "step": 2774 }, { "epoch": 1.5892205282370626, "grad_norm": 0.3878893554210663, "learning_rate": 1.1139227352845688e-05, "loss": 2.8065, "step": 2776 }, { "epoch": 1.5903657576408274, "grad_norm": 0.3793700337409973, "learning_rate": 1.1079736954209902e-05, "loss": 2.8089, "step": 2778 }, { "epoch": 1.5915109870445923, "grad_norm": 0.3874254524707794, "learning_rate": 1.1020386038442243e-05, "loss": 2.7482, "step": 2780 }, { "epoch": 1.5926562164483573, "grad_norm": 0.4027820825576782, "learning_rate": 1.0961174818245184e-05, "loss": 2.777, "step": 2782 }, { "epoch": 1.5938014458521224, "grad_norm": 0.41898322105407715, "learning_rate": 1.0902103505820532e-05, "loss": 2.7744, "step": 2784 }, { "epoch": 1.5949466752558872, "grad_norm": 0.3971405327320099, "learning_rate": 1.0843172312868737e-05, "loss": 2.8676, "step": 2786 }, { "epoch": 1.596091904659652, "grad_norm": 0.40912675857543945, "learning_rate": 1.0784381450588037e-05, "loss": 2.7838, "step": 2788 }, { "epoch": 1.597237134063417, "grad_norm": 0.39409947395324707, "learning_rate": 1.0725731129673788e-05, "loss": 2.7539, "step": 2790 }, { "epoch": 1.598382363467182, "grad_norm": 0.37693777680397034, "learning_rate": 1.0667221560317642e-05, "loss": 2.8002, "step": 2792 }, { "epoch": 1.599527592870947, "grad_norm": 0.3971855938434601, "learning_rate": 1.0608852952206838e-05, "loss": 2.7762, "step": 2794 }, { "epoch": 1.6006728222747117, "grad_norm": 0.38710400462150574, "learning_rate": 1.0550625514523454e-05, "loss": 2.8663, "step": 2796 }, { "epoch": 1.6018180516784768, "grad_norm": 0.38142094016075134, "learning_rate": 1.0492539455943617e-05, "loss": 2.7898, "step": 2798 }, { "epoch": 1.6029632810822418, "grad_norm": 0.3825976252555847, "learning_rate": 1.0434594984636787e-05, "loss": 2.8145, "step": 2800 }, { "epoch": 1.6041085104860069, "grad_norm": 0.35264310240745544, "learning_rate": 1.0376792308264988e-05, "loss": 2.7831, "step": 2802 }, { "epoch": 1.6052537398897717, "grad_norm": 0.39446234703063965, "learning_rate": 1.0319131633982088e-05, "loss": 2.791, "step": 2804 }, { "epoch": 1.6063989692935365, "grad_norm": 0.3639817535877228, "learning_rate": 1.0261613168433071e-05, "loss": 2.8079, "step": 2806 }, { "epoch": 1.6075441986973016, "grad_norm": 0.38718488812446594, "learning_rate": 1.0204237117753235e-05, "loss": 2.8517, "step": 2808 }, { "epoch": 1.6086894281010666, "grad_norm": 0.3712478280067444, "learning_rate": 1.0147003687567497e-05, "loss": 2.8061, "step": 2810 }, { "epoch": 1.6098346575048315, "grad_norm": 0.3503079414367676, "learning_rate": 1.0089913082989666e-05, "loss": 2.7926, "step": 2812 }, { "epoch": 1.6109798869085963, "grad_norm": 0.37064117193222046, "learning_rate": 1.0032965508621672e-05, "loss": 2.7472, "step": 2814 }, { "epoch": 1.6121251163123613, "grad_norm": 0.3703208863735199, "learning_rate": 9.976161168552866e-06, "loss": 2.7534, "step": 2816 }, { "epoch": 1.6132703457161264, "grad_norm": 0.3922175467014313, "learning_rate": 9.919500266359267e-06, "loss": 2.8032, "step": 2818 }, { "epoch": 1.6144155751198912, "grad_norm": 0.3689518868923187, "learning_rate": 9.862983005102844e-06, "loss": 2.7705, "step": 2820 }, { "epoch": 1.615560804523656, "grad_norm": 0.3845526874065399, "learning_rate": 9.806609587330774e-06, "loss": 2.7804, "step": 2822 }, { "epoch": 1.616706033927421, "grad_norm": 0.37657397985458374, "learning_rate": 9.750380215074728e-06, "loss": 2.7891, "step": 2824 }, { "epoch": 1.6178512633311861, "grad_norm": 0.3700743317604065, "learning_rate": 9.694295089850175e-06, "loss": 2.8107, "step": 2826 }, { "epoch": 1.618996492734951, "grad_norm": 0.3766295313835144, "learning_rate": 9.638354412655587e-06, "loss": 2.7892, "step": 2828 }, { "epoch": 1.6201417221387158, "grad_norm": 0.38320571184158325, "learning_rate": 9.58255838397179e-06, "loss": 2.8108, "step": 2830 }, { "epoch": 1.6212869515424808, "grad_norm": 0.371046781539917, "learning_rate": 9.526907203761198e-06, "loss": 2.7906, "step": 2832 }, { "epoch": 1.6224321809462459, "grad_norm": 0.3817175626754761, "learning_rate": 9.471401071467117e-06, "loss": 2.8284, "step": 2834 }, { "epoch": 1.6235774103500107, "grad_norm": 0.36283162236213684, "learning_rate": 9.416040186013059e-06, "loss": 2.8706, "step": 2836 }, { "epoch": 1.6247226397537757, "grad_norm": 0.37132787704467773, "learning_rate": 9.360824745801965e-06, "loss": 2.7586, "step": 2838 }, { "epoch": 1.6258678691575406, "grad_norm": 0.36096081137657166, "learning_rate": 9.305754948715545e-06, "loss": 2.7966, "step": 2840 }, { "epoch": 1.6270130985613056, "grad_norm": 0.36539822816848755, "learning_rate": 9.250830992113535e-06, "loss": 2.7724, "step": 2842 }, { "epoch": 1.6281583279650707, "grad_norm": 0.36889728903770447, "learning_rate": 9.19605307283301e-06, "loss": 2.8821, "step": 2844 }, { "epoch": 1.6293035573688355, "grad_norm": 0.36020317673683167, "learning_rate": 9.141421387187704e-06, "loss": 2.7614, "step": 2846 }, { "epoch": 1.6304487867726003, "grad_norm": 0.3701307475566864, "learning_rate": 9.086936130967238e-06, "loss": 2.7744, "step": 2848 }, { "epoch": 1.6315940161763653, "grad_norm": 0.3817855417728424, "learning_rate": 9.03259749943649e-06, "loss": 2.8011, "step": 2850 }, { "epoch": 1.6327392455801304, "grad_norm": 0.36633580923080444, "learning_rate": 8.978405687334818e-06, "loss": 2.8033, "step": 2852 }, { "epoch": 1.6338844749838952, "grad_norm": 0.36279794573783875, "learning_rate": 8.924360888875466e-06, "loss": 2.7651, "step": 2854 }, { "epoch": 1.63502970438766, "grad_norm": 0.37279778718948364, "learning_rate": 8.870463297744774e-06, "loss": 2.7504, "step": 2856 }, { "epoch": 1.636174933791425, "grad_norm": 0.3858235776424408, "learning_rate": 8.816713107101514e-06, "loss": 2.7774, "step": 2858 }, { "epoch": 1.6373201631951901, "grad_norm": 0.3693833649158478, "learning_rate": 8.763110509576217e-06, "loss": 2.7706, "step": 2860 }, { "epoch": 1.638465392598955, "grad_norm": 0.3637385070323944, "learning_rate": 8.709655697270464e-06, "loss": 2.7455, "step": 2862 }, { "epoch": 1.6396106220027198, "grad_norm": 0.6835874319076538, "learning_rate": 8.656348861756198e-06, "loss": 2.7078, "step": 2864 }, { "epoch": 1.6407558514064848, "grad_norm": 0.36875632405281067, "learning_rate": 8.603190194075045e-06, "loss": 2.8309, "step": 2866 }, { "epoch": 1.6419010808102499, "grad_norm": 0.348848819732666, "learning_rate": 8.55017988473763e-06, "loss": 2.7738, "step": 2868 }, { "epoch": 1.6430463102140147, "grad_norm": 0.3683876395225525, "learning_rate": 8.497318123722875e-06, "loss": 2.8374, "step": 2870 }, { "epoch": 1.6441915396177795, "grad_norm": 0.3629082143306732, "learning_rate": 8.444605100477338e-06, "loss": 2.7796, "step": 2872 }, { "epoch": 1.6453367690215446, "grad_norm": 0.35974109172821045, "learning_rate": 8.392041003914552e-06, "loss": 2.7879, "step": 2874 }, { "epoch": 1.6464819984253096, "grad_norm": 0.35452204942703247, "learning_rate": 8.339626022414304e-06, "loss": 2.7745, "step": 2876 }, { "epoch": 1.6476272278290747, "grad_norm": 0.3606550991535187, "learning_rate": 8.287360343821971e-06, "loss": 2.7867, "step": 2878 }, { "epoch": 1.6487724572328395, "grad_norm": 0.3575342297554016, "learning_rate": 8.235244155447875e-06, "loss": 2.7954, "step": 2880 }, { "epoch": 1.6499176866366043, "grad_norm": 0.35074177384376526, "learning_rate": 8.183277644066572e-06, "loss": 2.7192, "step": 2882 }, { "epoch": 1.6510629160403694, "grad_norm": 0.3556416630744934, "learning_rate": 8.131460995916246e-06, "loss": 2.7754, "step": 2884 }, { "epoch": 1.6522081454441344, "grad_norm": 0.3557741940021515, "learning_rate": 8.079794396697959e-06, "loss": 2.8035, "step": 2886 }, { "epoch": 1.6533533748478992, "grad_norm": 0.346952348947525, "learning_rate": 8.028278031575043e-06, "loss": 2.7475, "step": 2888 }, { "epoch": 1.654498604251664, "grad_norm": 0.3559010326862335, "learning_rate": 7.976912085172406e-06, "loss": 2.7738, "step": 2890 }, { "epoch": 1.655643833655429, "grad_norm": 0.3678223490715027, "learning_rate": 7.925696741575889e-06, "loss": 2.7635, "step": 2892 }, { "epoch": 1.6567890630591942, "grad_norm": 0.3549870550632477, "learning_rate": 7.874632184331632e-06, "loss": 2.8328, "step": 2894 }, { "epoch": 1.657934292462959, "grad_norm": 0.5636059045791626, "learning_rate": 7.82371859644534e-06, "loss": 2.8042, "step": 2896 }, { "epoch": 1.6590795218667238, "grad_norm": 0.3788129687309265, "learning_rate": 7.772956160381695e-06, "loss": 2.8311, "step": 2898 }, { "epoch": 1.6602247512704889, "grad_norm": 0.3805330693721771, "learning_rate": 7.722345058063668e-06, "loss": 2.7599, "step": 2900 }, { "epoch": 1.661369980674254, "grad_norm": 0.3647625148296356, "learning_rate": 7.671885470871888e-06, "loss": 2.8458, "step": 2902 }, { "epoch": 1.6625152100780187, "grad_norm": 0.3516094386577606, "learning_rate": 7.62157757964399e-06, "loss": 2.8048, "step": 2904 }, { "epoch": 1.6636604394817835, "grad_norm": 0.3611275851726532, "learning_rate": 7.571421564673948e-06, "loss": 2.7953, "step": 2906 }, { "epoch": 1.6648056688855486, "grad_norm": 0.35804450511932373, "learning_rate": 7.521417605711434e-06, "loss": 2.7368, "step": 2908 }, { "epoch": 1.6659508982893136, "grad_norm": 0.35790038108825684, "learning_rate": 7.471565881961195e-06, "loss": 2.7388, "step": 2910 }, { "epoch": 1.6670961276930785, "grad_norm": 0.35643112659454346, "learning_rate": 7.421866572082375e-06, "loss": 2.7573, "step": 2912 }, { "epoch": 1.6682413570968435, "grad_norm": 0.3534427285194397, "learning_rate": 7.372319854187926e-06, "loss": 2.8009, "step": 2914 }, { "epoch": 1.6693865865006083, "grad_norm": 0.3942296504974365, "learning_rate": 7.322925905843919e-06, "loss": 2.8384, "step": 2916 }, { "epoch": 1.6705318159043734, "grad_norm": 0.34432345628738403, "learning_rate": 7.273684904068928e-06, "loss": 2.7927, "step": 2918 }, { "epoch": 1.6716770453081384, "grad_norm": 0.3574564456939697, "learning_rate": 7.224597025333396e-06, "loss": 2.8061, "step": 2920 }, { "epoch": 1.6728222747119033, "grad_norm": 0.3473871052265167, "learning_rate": 7.175662445559012e-06, "loss": 2.7812, "step": 2922 }, { "epoch": 1.673967504115668, "grad_norm": 0.3656257688999176, "learning_rate": 7.126881340118053e-06, "loss": 2.7909, "step": 2924 }, { "epoch": 1.6751127335194331, "grad_norm": 0.3552693724632263, "learning_rate": 7.078253883832786e-06, "loss": 2.7946, "step": 2926 }, { "epoch": 1.6762579629231982, "grad_norm": 0.3596632778644562, "learning_rate": 7.029780250974827e-06, "loss": 2.7902, "step": 2928 }, { "epoch": 1.677403192326963, "grad_norm": 0.36203533411026, "learning_rate": 6.981460615264518e-06, "loss": 2.8162, "step": 2930 }, { "epoch": 1.6785484217307278, "grad_norm": 0.35209399461746216, "learning_rate": 6.933295149870289e-06, "loss": 2.7921, "step": 2932 }, { "epoch": 1.6796936511344929, "grad_norm": 0.37425243854522705, "learning_rate": 6.88528402740809e-06, "loss": 2.8408, "step": 2934 }, { "epoch": 1.680838880538258, "grad_norm": 0.35894691944122314, "learning_rate": 6.837427419940701e-06, "loss": 2.805, "step": 2936 }, { "epoch": 1.6819841099420227, "grad_norm": 0.37119370698928833, "learning_rate": 6.789725498977151e-06, "loss": 2.8534, "step": 2938 }, { "epoch": 1.6831293393457876, "grad_norm": 0.36945095658302307, "learning_rate": 6.742178435472124e-06, "loss": 2.7411, "step": 2940 }, { "epoch": 1.6842745687495526, "grad_norm": 0.3619667589664459, "learning_rate": 6.694786399825292e-06, "loss": 2.8024, "step": 2942 }, { "epoch": 1.6854197981533177, "grad_norm": 0.36520203948020935, "learning_rate": 6.647549561880778e-06, "loss": 2.8327, "step": 2944 }, { "epoch": 1.6865650275570825, "grad_norm": 0.3525446951389313, "learning_rate": 6.600468090926476e-06, "loss": 2.781, "step": 2946 }, { "epoch": 1.6877102569608473, "grad_norm": 0.36201027035713196, "learning_rate": 6.553542155693476e-06, "loss": 2.7936, "step": 2948 }, { "epoch": 1.6888554863646124, "grad_norm": 0.36204794049263, "learning_rate": 6.5067719243554645e-06, "loss": 2.8075, "step": 2950 }, { "epoch": 1.6900007157683774, "grad_norm": 0.3580401539802551, "learning_rate": 6.460157564528102e-06, "loss": 2.7621, "step": 2952 }, { "epoch": 1.6911459451721425, "grad_norm": 0.375422865152359, "learning_rate": 6.413699243268467e-06, "loss": 2.801, "step": 2954 }, { "epoch": 1.6922911745759073, "grad_norm": 0.3734365403652191, "learning_rate": 6.367397127074393e-06, "loss": 2.7879, "step": 2956 }, { "epoch": 1.693436403979672, "grad_norm": 0.34999606013298035, "learning_rate": 6.321251381883913e-06, "loss": 2.7559, "step": 2958 }, { "epoch": 1.6945816333834371, "grad_norm": 0.35819539427757263, "learning_rate": 6.275262173074664e-06, "loss": 2.8245, "step": 2960 }, { "epoch": 1.6957268627872022, "grad_norm": 0.3607465326786041, "learning_rate": 6.22942966546326e-06, "loss": 2.8005, "step": 2962 }, { "epoch": 1.696872092190967, "grad_norm": 0.35205408930778503, "learning_rate": 6.1837540233047775e-06, "loss": 2.8301, "step": 2964 }, { "epoch": 1.6980173215947318, "grad_norm": 0.3466891348361969, "learning_rate": 6.13823541029207e-06, "loss": 2.8944, "step": 2966 }, { "epoch": 1.699162550998497, "grad_norm": 0.35719573497772217, "learning_rate": 6.092873989555253e-06, "loss": 2.8451, "step": 2968 }, { "epoch": 1.700307780402262, "grad_norm": 0.3446790874004364, "learning_rate": 6.047669923661075e-06, "loss": 2.7538, "step": 2970 }, { "epoch": 1.7014530098060268, "grad_norm": 0.36106982827186584, "learning_rate": 6.00262337461237e-06, "loss": 2.791, "step": 2972 }, { "epoch": 1.7025982392097916, "grad_norm": 0.3367460072040558, "learning_rate": 5.9577345038474675e-06, "loss": 2.7467, "step": 2974 }, { "epoch": 1.7037434686135566, "grad_norm": 0.35987377166748047, "learning_rate": 5.9130034722395865e-06, "loss": 2.8188, "step": 2976 }, { "epoch": 1.7048886980173217, "grad_norm": 0.3397689461708069, "learning_rate": 5.868430440096296e-06, "loss": 2.8115, "step": 2978 }, { "epoch": 1.7060339274210865, "grad_norm": 0.3493924140930176, "learning_rate": 5.8240155671589134e-06, "loss": 2.785, "step": 2980 }, { "epoch": 1.7071791568248513, "grad_norm": 0.34710371494293213, "learning_rate": 5.779759012601954e-06, "loss": 2.816, "step": 2982 }, { "epoch": 1.7083243862286164, "grad_norm": 0.36267027258872986, "learning_rate": 5.735660935032555e-06, "loss": 2.7945, "step": 2984 }, { "epoch": 1.7094696156323814, "grad_norm": 0.3607076108455658, "learning_rate": 5.691721492489888e-06, "loss": 2.842, "step": 2986 }, { "epoch": 1.7106148450361462, "grad_norm": 0.3613309860229492, "learning_rate": 5.6479408424446104e-06, "loss": 2.7765, "step": 2988 }, { "epoch": 1.7117600744399113, "grad_norm": 0.34058722853660583, "learning_rate": 5.604319141798303e-06, "loss": 2.7905, "step": 2990 }, { "epoch": 1.7129053038436761, "grad_norm": 0.33581697940826416, "learning_rate": 5.560856546882881e-06, "loss": 2.8228, "step": 2992 }, { "epoch": 1.7140505332474412, "grad_norm": 0.3725946843624115, "learning_rate": 5.517553213460097e-06, "loss": 2.7483, "step": 2994 }, { "epoch": 1.7151957626512062, "grad_norm": 0.35477662086486816, "learning_rate": 5.474409296720901e-06, "loss": 2.8139, "step": 2996 }, { "epoch": 1.716340992054971, "grad_norm": 0.3297097682952881, "learning_rate": 5.431424951284942e-06, "loss": 2.7894, "step": 2998 }, { "epoch": 1.7174862214587359, "grad_norm": 0.352491170167923, "learning_rate": 5.388600331199989e-06, "loss": 2.8531, "step": 3000 }, { "epoch": 1.718631450862501, "grad_norm": 0.3736025393009186, "learning_rate": 5.345935589941376e-06, "loss": 2.7945, "step": 3002 }, { "epoch": 1.719776680266266, "grad_norm": 0.35439836978912354, "learning_rate": 5.303430880411497e-06, "loss": 2.7928, "step": 3004 }, { "epoch": 1.7209219096700308, "grad_norm": 0.3541104197502136, "learning_rate": 5.261086354939193e-06, "loss": 2.8003, "step": 3006 }, { "epoch": 1.7220671390737956, "grad_norm": 0.35658323764801025, "learning_rate": 5.218902165279232e-06, "loss": 2.7813, "step": 3008 }, { "epoch": 1.7232123684775607, "grad_norm": 0.363407701253891, "learning_rate": 5.1768784626117835e-06, "loss": 2.8683, "step": 3010 }, { "epoch": 1.7243575978813257, "grad_norm": 0.3532641530036926, "learning_rate": 5.135015397541848e-06, "loss": 2.8588, "step": 3012 }, { "epoch": 1.7255028272850905, "grad_norm": 0.3381511867046356, "learning_rate": 5.093313120098752e-06, "loss": 2.7717, "step": 3014 }, { "epoch": 1.7266480566888553, "grad_norm": 0.366940975189209, "learning_rate": 5.051771779735581e-06, "loss": 2.7712, "step": 3016 }, { "epoch": 1.7277932860926204, "grad_norm": 0.3711313009262085, "learning_rate": 5.0103915253286395e-06, "loss": 2.8563, "step": 3018 }, { "epoch": 1.7289385154963854, "grad_norm": 0.3527882397174835, "learning_rate": 4.969172505176939e-06, "loss": 2.6896, "step": 3020 }, { "epoch": 1.7300837449001503, "grad_norm": 0.36136162281036377, "learning_rate": 4.928114867001665e-06, "loss": 2.768, "step": 3022 }, { "epoch": 1.731228974303915, "grad_norm": 0.3595525920391083, "learning_rate": 4.887218757945633e-06, "loss": 2.7676, "step": 3024 }, { "epoch": 1.7323742037076801, "grad_norm": 0.35122060775756836, "learning_rate": 4.846484324572764e-06, "loss": 2.7391, "step": 3026 }, { "epoch": 1.7335194331114452, "grad_norm": 0.35008570551872253, "learning_rate": 4.805911712867572e-06, "loss": 2.7526, "step": 3028 }, { "epoch": 1.7346646625152102, "grad_norm": 0.35732609033584595, "learning_rate": 4.7655010682346345e-06, "loss": 2.8349, "step": 3030 }, { "epoch": 1.735809891918975, "grad_norm": 0.3600618839263916, "learning_rate": 4.7252525354980534e-06, "loss": 2.8164, "step": 3032 }, { "epoch": 1.7369551213227399, "grad_norm": 0.342807412147522, "learning_rate": 4.68516625890098e-06, "loss": 2.7588, "step": 3034 }, { "epoch": 1.738100350726505, "grad_norm": 0.3507077097892761, "learning_rate": 4.645242382105053e-06, "loss": 2.8033, "step": 3036 }, { "epoch": 1.73924558013027, "grad_norm": 0.3395911455154419, "learning_rate": 4.605481048189897e-06, "loss": 2.7585, "step": 3038 }, { "epoch": 1.7403908095340348, "grad_norm": 0.3708266615867615, "learning_rate": 4.56588239965261e-06, "loss": 2.8184, "step": 3040 }, { "epoch": 1.7415360389377996, "grad_norm": 0.341718852519989, "learning_rate": 4.52644657840729e-06, "loss": 2.7807, "step": 3042 }, { "epoch": 1.7426812683415647, "grad_norm": 0.34745344519615173, "learning_rate": 4.487173725784449e-06, "loss": 2.7769, "step": 3044 }, { "epoch": 1.7438264977453297, "grad_norm": 0.3659815490245819, "learning_rate": 4.448063982530576e-06, "loss": 2.7575, "step": 3046 }, { "epoch": 1.7449717271490945, "grad_norm": 0.3450164794921875, "learning_rate": 4.409117488807602e-06, "loss": 2.8391, "step": 3048 }, { "epoch": 1.7461169565528594, "grad_norm": 0.34733206033706665, "learning_rate": 4.370334384192381e-06, "loss": 2.7326, "step": 3050 }, { "epoch": 1.7472621859566244, "grad_norm": 0.34988871216773987, "learning_rate": 4.331714807676268e-06, "loss": 2.7968, "step": 3052 }, { "epoch": 1.7484074153603895, "grad_norm": 0.3481846749782562, "learning_rate": 4.293258897664504e-06, "loss": 2.7678, "step": 3054 }, { "epoch": 1.7495526447641543, "grad_norm": 0.3538358509540558, "learning_rate": 4.254966791975806e-06, "loss": 2.8065, "step": 3056 }, { "epoch": 1.750697874167919, "grad_norm": 0.35168197751045227, "learning_rate": 4.216838627841846e-06, "loss": 2.8137, "step": 3058 }, { "epoch": 1.7518431035716842, "grad_norm": 0.34234529733657837, "learning_rate": 4.178874541906752e-06, "loss": 2.7366, "step": 3060 }, { "epoch": 1.7529883329754492, "grad_norm": 0.34907153248786926, "learning_rate": 4.141074670226646e-06, "loss": 2.7789, "step": 3062 }, { "epoch": 1.7541335623792143, "grad_norm": 0.3454132080078125, "learning_rate": 4.103439148269128e-06, "loss": 2.8062, "step": 3064 }, { "epoch": 1.755278791782979, "grad_norm": 0.3408695161342621, "learning_rate": 4.0659681109127875e-06, "loss": 2.6874, "step": 3066 }, { "epoch": 1.756424021186744, "grad_norm": 0.3572022020816803, "learning_rate": 4.028661692446739e-06, "loss": 2.7999, "step": 3068 }, { "epoch": 1.757569250590509, "grad_norm": 0.340815931558609, "learning_rate": 3.99152002657015e-06, "loss": 2.8073, "step": 3070 }, { "epoch": 1.758714479994274, "grad_norm": 0.36942556500434875, "learning_rate": 3.9545432463917175e-06, "loss": 2.7673, "step": 3072 }, { "epoch": 1.7598597093980388, "grad_norm": 0.3383021056652069, "learning_rate": 3.917731484429243e-06, "loss": 2.8229, "step": 3074 }, { "epoch": 1.7610049388018036, "grad_norm": 0.34019801020622253, "learning_rate": 3.88108487260912e-06, "loss": 2.8506, "step": 3076 }, { "epoch": 1.7621501682055687, "grad_norm": 0.34177881479263306, "learning_rate": 3.844603542265884e-06, "loss": 2.7634, "step": 3078 }, { "epoch": 1.7632953976093337, "grad_norm": 0.3580133020877838, "learning_rate": 3.808287624141721e-06, "loss": 2.8074, "step": 3080 }, { "epoch": 1.7644406270130986, "grad_norm": 0.3615207374095917, "learning_rate": 3.7721372483860372e-06, "loss": 2.8447, "step": 3082 }, { "epoch": 1.7655858564168634, "grad_norm": 0.3536333441734314, "learning_rate": 3.736152544554933e-06, "loss": 2.7907, "step": 3084 }, { "epoch": 1.7667310858206284, "grad_norm": 0.3504035770893097, "learning_rate": 3.700333641610798e-06, "loss": 2.7329, "step": 3086 }, { "epoch": 1.7678763152243935, "grad_norm": 0.33401525020599365, "learning_rate": 3.6646806679217994e-06, "loss": 2.7288, "step": 3088 }, { "epoch": 1.7690215446281583, "grad_norm": 0.3418073356151581, "learning_rate": 3.629193751261456e-06, "loss": 2.8122, "step": 3090 }, { "epoch": 1.7701667740319231, "grad_norm": 0.3692437410354614, "learning_rate": 3.593873018808186e-06, "loss": 2.804, "step": 3092 }, { "epoch": 1.7713120034356882, "grad_norm": 0.33674415946006775, "learning_rate": 3.5587185971448167e-06, "loss": 2.7605, "step": 3094 }, { "epoch": 1.7724572328394532, "grad_norm": 0.35134121775627136, "learning_rate": 3.5237306122581417e-06, "loss": 2.8305, "step": 3096 }, { "epoch": 1.773602462243218, "grad_norm": 0.35533973574638367, "learning_rate": 3.4889091895384973e-06, "loss": 2.7526, "step": 3098 }, { "epoch": 1.7747476916469829, "grad_norm": 0.3479813039302826, "learning_rate": 3.454254453779271e-06, "loss": 2.7909, "step": 3100 }, { "epoch": 1.775892921050748, "grad_norm": 0.33563852310180664, "learning_rate": 3.4197665291765046e-06, "loss": 2.8069, "step": 3102 }, { "epoch": 1.777038150454513, "grad_norm": 0.34964680671691895, "learning_rate": 3.3854455393283967e-06, "loss": 2.7989, "step": 3104 }, { "epoch": 1.778183379858278, "grad_norm": 0.3345055878162384, "learning_rate": 3.35129160723488e-06, "loss": 2.7256, "step": 3106 }, { "epoch": 1.7793286092620428, "grad_norm": 0.6176712512969971, "learning_rate": 3.317304855297199e-06, "loss": 2.8056, "step": 3108 }, { "epoch": 1.7804738386658077, "grad_norm": 0.34730008244514465, "learning_rate": 3.283485405317449e-06, "loss": 2.7684, "step": 3110 }, { "epoch": 1.7816190680695727, "grad_norm": 0.3355562388896942, "learning_rate": 3.2498333784981515e-06, "loss": 2.8256, "step": 3112 }, { "epoch": 1.7827642974733378, "grad_norm": 0.33414214849472046, "learning_rate": 3.216348895441812e-06, "loss": 2.7991, "step": 3114 }, { "epoch": 1.7839095268771026, "grad_norm": 0.3385785222053528, "learning_rate": 3.1830320761504883e-06, "loss": 2.8262, "step": 3116 }, { "epoch": 1.7850547562808674, "grad_norm": 0.3246150612831116, "learning_rate": 3.1498830400253644e-06, "loss": 2.8279, "step": 3118 }, { "epoch": 1.7861999856846325, "grad_norm": 0.3460950553417206, "learning_rate": 3.1169019058663205e-06, "loss": 2.8359, "step": 3120 }, { "epoch": 1.7873452150883975, "grad_norm": 0.34007689356803894, "learning_rate": 3.0840887918715142e-06, "loss": 2.7683, "step": 3122 }, { "epoch": 1.7884904444921623, "grad_norm": 0.32850441336631775, "learning_rate": 3.0514438156369395e-06, "loss": 2.7411, "step": 3124 }, { "epoch": 1.7896356738959271, "grad_norm": 0.34231916069984436, "learning_rate": 3.0189670941560223e-06, "loss": 2.7701, "step": 3126 }, { "epoch": 1.7907809032996922, "grad_norm": 0.3406977951526642, "learning_rate": 2.9866587438192038e-06, "loss": 2.8321, "step": 3128 }, { "epoch": 1.7919261327034572, "grad_norm": 0.33753177523612976, "learning_rate": 2.9545188804134873e-06, "loss": 2.8345, "step": 3130 }, { "epoch": 1.793071362107222, "grad_norm": 0.3528638482093811, "learning_rate": 2.92254761912209e-06, "loss": 2.8028, "step": 3132 }, { "epoch": 1.794216591510987, "grad_norm": 0.3453840911388397, "learning_rate": 2.8907450745239707e-06, "loss": 2.7796, "step": 3134 }, { "epoch": 1.795361820914752, "grad_norm": 0.332639217376709, "learning_rate": 2.859111360593425e-06, "loss": 2.7839, "step": 3136 }, { "epoch": 1.796507050318517, "grad_norm": 0.3303762972354889, "learning_rate": 2.8276465906997175e-06, "loss": 2.7572, "step": 3138 }, { "epoch": 1.797652279722282, "grad_norm": 0.32917872071266174, "learning_rate": 2.7963508776066294e-06, "loss": 2.7965, "step": 3140 }, { "epoch": 1.7987975091260469, "grad_norm": 0.34089693427085876, "learning_rate": 2.7652243334720886e-06, "loss": 2.8239, "step": 3142 }, { "epoch": 1.7999427385298117, "grad_norm": 0.3425874412059784, "learning_rate": 2.7342670698477513e-06, "loss": 2.8552, "step": 3144 }, { "epoch": 1.8010879679335767, "grad_norm": 0.33585497736930847, "learning_rate": 2.7034791976785935e-06, "loss": 2.7841, "step": 3146 }, { "epoch": 1.8022331973373418, "grad_norm": 0.35143813490867615, "learning_rate": 2.672860827302531e-06, "loss": 2.8617, "step": 3148 }, { "epoch": 1.8033784267411066, "grad_norm": 0.333116352558136, "learning_rate": 2.642412068450012e-06, "loss": 2.7856, "step": 3150 }, { "epoch": 1.8045236561448714, "grad_norm": 0.3300241529941559, "learning_rate": 2.6121330302436466e-06, "loss": 2.7256, "step": 3152 }, { "epoch": 1.8056688855486365, "grad_norm": 0.3349437117576599, "learning_rate": 2.5820238211977876e-06, "loss": 2.8157, "step": 3154 }, { "epoch": 1.8068141149524015, "grad_norm": 0.3350974917411804, "learning_rate": 2.5520845492181367e-06, "loss": 2.7961, "step": 3156 }, { "epoch": 1.8079593443561663, "grad_norm": 0.3298918604850769, "learning_rate": 2.5223153216013984e-06, "loss": 2.7446, "step": 3158 }, { "epoch": 1.8091045737599312, "grad_norm": 0.3455289602279663, "learning_rate": 2.4927162450348485e-06, "loss": 2.8688, "step": 3160 }, { "epoch": 1.8102498031636962, "grad_norm": 0.3411054015159607, "learning_rate": 2.4632874255959946e-06, "loss": 2.7303, "step": 3162 }, { "epoch": 1.8113950325674613, "grad_norm": 0.3444480001926422, "learning_rate": 2.4340289687521665e-06, "loss": 2.7764, "step": 3164 }, { "epoch": 1.812540261971226, "grad_norm": 0.348910391330719, "learning_rate": 2.404940979360132e-06, "loss": 2.8186, "step": 3166 }, { "epoch": 1.813685491374991, "grad_norm": 0.33785849809646606, "learning_rate": 2.3760235616657633e-06, "loss": 2.7404, "step": 3168 }, { "epoch": 1.814830720778756, "grad_norm": 0.3371528089046478, "learning_rate": 2.3472768193036065e-06, "loss": 2.7917, "step": 3170 }, { "epoch": 1.815975950182521, "grad_norm": 0.32835063338279724, "learning_rate": 2.318700855296563e-06, "loss": 2.7773, "step": 3172 }, { "epoch": 1.8171211795862858, "grad_norm": 0.3315414488315582, "learning_rate": 2.290295772055484e-06, "loss": 2.7966, "step": 3174 }, { "epoch": 1.8182664089900507, "grad_norm": 0.31980353593826294, "learning_rate": 2.2620616713788235e-06, "loss": 2.787, "step": 3176 }, { "epoch": 1.8194116383938157, "grad_norm": 0.33509254455566406, "learning_rate": 2.2339986544522573e-06, "loss": 2.7871, "step": 3178 }, { "epoch": 1.8205568677975807, "grad_norm": 0.329830139875412, "learning_rate": 2.206106821848336e-06, "loss": 2.7573, "step": 3180 }, { "epoch": 1.8217020972013458, "grad_norm": 0.3474451005458832, "learning_rate": 2.178386273526123e-06, "loss": 2.8144, "step": 3182 }, { "epoch": 1.8228473266051106, "grad_norm": 0.33576250076293945, "learning_rate": 2.1508371088308277e-06, "loss": 2.7399, "step": 3184 }, { "epoch": 1.8239925560088754, "grad_norm": 0.3393871486186981, "learning_rate": 2.1234594264934404e-06, "loss": 2.787, "step": 3186 }, { "epoch": 1.8251377854126405, "grad_norm": 0.3249988257884979, "learning_rate": 2.0962533246304096e-06, "loss": 2.7355, "step": 3188 }, { "epoch": 1.8262830148164055, "grad_norm": 0.32753321528434753, "learning_rate": 2.0692189007432528e-06, "loss": 2.806, "step": 3190 }, { "epoch": 1.8274282442201704, "grad_norm": 0.3471840023994446, "learning_rate": 2.042356251718247e-06, "loss": 2.7807, "step": 3192 }, { "epoch": 1.8285734736239352, "grad_norm": 0.32751500606536865, "learning_rate": 2.0156654738260448e-06, "loss": 2.783, "step": 3194 }, { "epoch": 1.8297187030277002, "grad_norm": 0.3399296700954437, "learning_rate": 1.9891466627213585e-06, "loss": 2.8289, "step": 3196 }, { "epoch": 1.8308639324314653, "grad_norm": 0.3380739688873291, "learning_rate": 1.962799913442581e-06, "loss": 2.7854, "step": 3198 }, { "epoch": 1.83200916183523, "grad_norm": 0.329171746969223, "learning_rate": 1.9366253204114947e-06, "loss": 2.7884, "step": 3200 }, { "epoch": 1.833154391238995, "grad_norm": 0.34631481766700745, "learning_rate": 1.910622977432891e-06, "loss": 2.7858, "step": 3202 }, { "epoch": 1.83429962064276, "grad_norm": 0.3457001745700836, "learning_rate": 1.8847929776942608e-06, "loss": 2.7947, "step": 3204 }, { "epoch": 1.835444850046525, "grad_norm": 0.33301305770874023, "learning_rate": 1.8591354137654283e-06, "loss": 2.7127, "step": 3206 }, { "epoch": 1.8365900794502898, "grad_norm": 0.3279321789741516, "learning_rate": 1.8336503775982672e-06, "loss": 2.7959, "step": 3208 }, { "epoch": 1.8377353088540547, "grad_norm": 0.330035924911499, "learning_rate": 1.8083379605263185e-06, "loss": 2.792, "step": 3210 }, { "epoch": 1.8388805382578197, "grad_norm": 0.3374471962451935, "learning_rate": 1.783198253264512e-06, "loss": 2.7966, "step": 3212 }, { "epoch": 1.8400257676615848, "grad_norm": 0.32652756571769714, "learning_rate": 1.7582313459088118e-06, "loss": 2.8046, "step": 3214 }, { "epoch": 1.8411709970653498, "grad_norm": 0.3380284905433655, "learning_rate": 1.7334373279358829e-06, "loss": 2.8157, "step": 3216 }, { "epoch": 1.8423162264691146, "grad_norm": 0.3361000418663025, "learning_rate": 1.7088162882028081e-06, "loss": 2.8368, "step": 3218 }, { "epoch": 1.8434614558728795, "grad_norm": 0.3236636519432068, "learning_rate": 1.6843683149467437e-06, "loss": 2.7995, "step": 3220 }, { "epoch": 1.8446066852766445, "grad_norm": 0.3316330909729004, "learning_rate": 1.6600934957845982e-06, "loss": 2.6964, "step": 3222 }, { "epoch": 1.8457519146804096, "grad_norm": 0.3344908654689789, "learning_rate": 1.6359919177127425e-06, "loss": 2.7659, "step": 3224 }, { "epoch": 1.8468971440841744, "grad_norm": 0.327034056186676, "learning_rate": 1.6120636671066724e-06, "loss": 2.7614, "step": 3226 }, { "epoch": 1.8480423734879392, "grad_norm": 0.33758437633514404, "learning_rate": 1.5883088297207194e-06, "loss": 2.7993, "step": 3228 }, { "epoch": 1.8491876028917043, "grad_norm": 0.3256714344024658, "learning_rate": 1.564727490687734e-06, "loss": 2.7244, "step": 3230 }, { "epoch": 1.8503328322954693, "grad_norm": 0.3244673013687134, "learning_rate": 1.5413197345187814e-06, "loss": 2.7569, "step": 3232 }, { "epoch": 1.8514780616992341, "grad_norm": 0.3476504981517792, "learning_rate": 1.5180856451028292e-06, "loss": 2.8114, "step": 3234 }, { "epoch": 1.852623291102999, "grad_norm": 0.33813002705574036, "learning_rate": 1.4950253057064656e-06, "loss": 2.7843, "step": 3236 }, { "epoch": 1.853768520506764, "grad_norm": 0.3342847526073456, "learning_rate": 1.472138798973588e-06, "loss": 2.7963, "step": 3238 }, { "epoch": 1.854913749910529, "grad_norm": 0.33507832884788513, "learning_rate": 1.4494262069251197e-06, "loss": 2.7728, "step": 3240 }, { "epoch": 1.8560589793142939, "grad_norm": 0.32828494906425476, "learning_rate": 1.4268876109586937e-06, "loss": 2.7177, "step": 3242 }, { "epoch": 1.8572042087180587, "grad_norm": 0.3284968435764313, "learning_rate": 1.4045230918483754e-06, "loss": 2.8073, "step": 3244 }, { "epoch": 1.8583494381218237, "grad_norm": 0.32281050086021423, "learning_rate": 1.3823327297443845e-06, "loss": 2.827, "step": 3246 }, { "epoch": 1.8594946675255888, "grad_norm": 0.33534350991249084, "learning_rate": 1.3603166041727678e-06, "loss": 2.7951, "step": 3248 }, { "epoch": 1.8606398969293536, "grad_norm": 0.3310191035270691, "learning_rate": 1.338474794035177e-06, "loss": 2.7419, "step": 3250 }, { "epoch": 1.8617851263331187, "grad_norm": 0.31621959805488586, "learning_rate": 1.3168073776085254e-06, "loss": 2.7219, "step": 3252 }, { "epoch": 1.8629303557368835, "grad_norm": 0.3386898636817932, "learning_rate": 1.2953144325447419e-06, "loss": 2.7494, "step": 3254 }, { "epoch": 1.8640755851406485, "grad_norm": 0.3290254473686218, "learning_rate": 1.2739960358704783e-06, "loss": 2.8016, "step": 3256 }, { "epoch": 1.8652208145444136, "grad_norm": 0.331177681684494, "learning_rate": 1.2528522639868311e-06, "loss": 2.8127, "step": 3258 }, { "epoch": 1.8663660439481784, "grad_norm": 0.32540303468704224, "learning_rate": 1.2318831926690976e-06, "loss": 2.7984, "step": 3260 }, { "epoch": 1.8675112733519432, "grad_norm": 0.3210640847682953, "learning_rate": 1.2110888970664648e-06, "loss": 2.8064, "step": 3262 }, { "epoch": 1.8686565027557083, "grad_norm": 0.3300011456012726, "learning_rate": 1.1904694517017479e-06, "loss": 2.7937, "step": 3264 }, { "epoch": 1.8698017321594733, "grad_norm": 0.3293037414550781, "learning_rate": 1.170024930471153e-06, "loss": 2.7709, "step": 3266 }, { "epoch": 1.8709469615632381, "grad_norm": 0.3471381366252899, "learning_rate": 1.1497554066439765e-06, "loss": 2.7584, "step": 3268 }, { "epoch": 1.872092190967003, "grad_norm": 0.33096760511398315, "learning_rate": 1.1296609528623602e-06, "loss": 2.7652, "step": 3270 }, { "epoch": 1.873237420370768, "grad_norm": 0.3322175443172455, "learning_rate": 1.1097416411410377e-06, "loss": 2.8076, "step": 3272 }, { "epoch": 1.874382649774533, "grad_norm": 0.3291280269622803, "learning_rate": 1.089997542867044e-06, "loss": 2.6794, "step": 3274 }, { "epoch": 1.8755278791782979, "grad_norm": 0.3335420489311218, "learning_rate": 1.0704287287995108e-06, "loss": 2.789, "step": 3276 }, { "epoch": 1.8766731085820627, "grad_norm": 0.32893529534339905, "learning_rate": 1.0510352690693558e-06, "loss": 2.7967, "step": 3278 }, { "epoch": 1.8778183379858278, "grad_norm": 0.33461055159568787, "learning_rate": 1.0318172331790833e-06, "loss": 2.7886, "step": 3280 }, { "epoch": 1.8789635673895928, "grad_norm": 0.3381837010383606, "learning_rate": 1.0127746900025048e-06, "loss": 2.7707, "step": 3282 }, { "epoch": 1.8801087967933576, "grad_norm": 0.33486098051071167, "learning_rate": 9.939077077844917e-07, "loss": 2.8634, "step": 3284 }, { "epoch": 1.8812540261971225, "grad_norm": 0.3287809193134308, "learning_rate": 9.7521635414074e-07, "loss": 2.7943, "step": 3286 }, { "epoch": 1.8823992556008875, "grad_norm": 0.3270516097545624, "learning_rate": 9.56700696057533e-07, "loss": 2.764, "step": 3288 }, { "epoch": 1.8835444850046525, "grad_norm": 0.3284582793712616, "learning_rate": 9.383607998914911e-07, "loss": 2.7701, "step": 3290 }, { "epoch": 1.8846897144084176, "grad_norm": 0.3257734179496765, "learning_rate": 9.201967313693382e-07, "loss": 2.8537, "step": 3292 }, { "epoch": 1.8858349438121824, "grad_norm": 0.3266497552394867, "learning_rate": 9.02208555587658e-07, "loss": 2.743, "step": 3294 }, { "epoch": 1.8869801732159472, "grad_norm": 0.3312336206436157, "learning_rate": 8.843963370126828e-07, "loss": 2.815, "step": 3296 }, { "epoch": 1.8881254026197123, "grad_norm": 0.32541465759277344, "learning_rate": 8.667601394800218e-07, "loss": 2.8161, "step": 3298 }, { "epoch": 1.8892706320234773, "grad_norm": 0.3300268352031708, "learning_rate": 8.493000261944884e-07, "loss": 2.7923, "step": 3300 } ], "logging_steps": 2, "max_steps": 3494, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 300, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3.714601713513005e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }