{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9838585703305149, "eval_steps": 500, "global_step": 12000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 8.198821419420958e-05, "grad_norm": 3.1852145195007324, "learning_rate": 0.0, "loss": 12.119, "step": 1 }, { "epoch": 0.00016397642838841917, "grad_norm": 3.045940399169922, "learning_rate": 1.639344262295082e-07, "loss": 12.1143, "step": 2 }, { "epoch": 0.00032795285677683834, "grad_norm": 3.0180699825286865, "learning_rate": 4.918032786885246e-07, "loss": 12.1146, "step": 4 }, { "epoch": 0.0004919292851652575, "grad_norm": 2.994316816329956, "learning_rate": 8.19672131147541e-07, "loss": 12.1185, "step": 6 }, { "epoch": 0.0006559057135536767, "grad_norm": 3.1152169704437256, "learning_rate": 1.1475409836065575e-06, "loss": 12.1106, "step": 8 }, { "epoch": 0.0008198821419420958, "grad_norm": 3.100684881210327, "learning_rate": 1.4754098360655739e-06, "loss": 12.1086, "step": 10 }, { "epoch": 0.000983858570330515, "grad_norm": 2.950772762298584, "learning_rate": 1.8032786885245903e-06, "loss": 12.102, "step": 12 }, { "epoch": 0.001147834998718934, "grad_norm": 2.897174835205078, "learning_rate": 2.1311475409836067e-06, "loss": 12.0903, "step": 14 }, { "epoch": 0.0013118114271073534, "grad_norm": 3.0111382007598877, "learning_rate": 2.459016393442623e-06, "loss": 12.0613, "step": 16 }, { "epoch": 0.0014757878554957724, "grad_norm": 3.11842942237854, "learning_rate": 2.7868852459016396e-06, "loss": 12.0268, "step": 18 }, { "epoch": 0.0016397642838841917, "grad_norm": 3.0497119426727295, "learning_rate": 3.114754098360656e-06, "loss": 12.0057, "step": 20 }, { "epoch": 0.0018037407122726108, "grad_norm": 3.685875177383423, "learning_rate": 3.4426229508196724e-06, "loss": 11.905, "step": 22 }, { "epoch": 0.00196771714066103, "grad_norm": 3.960944175720215, "learning_rate": 3.770491803278689e-06, "loss": 11.8163, "step": 24 }, { "epoch": 0.002131693569049449, "grad_norm": 4.291749000549316, "learning_rate": 4.098360655737704e-06, "loss": 11.7568, "step": 26 }, { "epoch": 0.002295669997437868, "grad_norm": 4.350489616394043, "learning_rate": 4.426229508196722e-06, "loss": 11.6989, "step": 28 }, { "epoch": 0.0024596464258262877, "grad_norm": 4.357988357543945, "learning_rate": 4.754098360655738e-06, "loss": 11.4718, "step": 30 }, { "epoch": 0.0026236228542147067, "grad_norm": 3.87373423576355, "learning_rate": 5.0819672131147545e-06, "loss": 11.2832, "step": 32 }, { "epoch": 0.0027875992826031258, "grad_norm": 3.546539306640625, "learning_rate": 5.409836065573771e-06, "loss": 11.1534, "step": 34 }, { "epoch": 0.002951575710991545, "grad_norm": 3.165611743927002, "learning_rate": 5.737704918032787e-06, "loss": 11.0611, "step": 36 }, { "epoch": 0.0031155521393799643, "grad_norm": 2.898167848587036, "learning_rate": 6.065573770491804e-06, "loss": 10.9593, "step": 38 }, { "epoch": 0.0032795285677683834, "grad_norm": 2.756183385848999, "learning_rate": 6.393442622950819e-06, "loss": 10.8399, "step": 40 }, { "epoch": 0.0034435049961568025, "grad_norm": 2.5953481197357178, "learning_rate": 6.721311475409836e-06, "loss": 10.7619, "step": 42 }, { "epoch": 0.0036074814245452215, "grad_norm": 2.4934237003326416, "learning_rate": 7.049180327868852e-06, "loss": 10.6836, "step": 44 }, { "epoch": 0.0037714578529336406, "grad_norm": 2.4235870838165283, "learning_rate": 7.3770491803278695e-06, "loss": 10.6374, "step": 46 }, { "epoch": 0.00393543428132206, "grad_norm": 2.4326910972595215, "learning_rate": 7.704918032786886e-06, "loss": 10.5798, "step": 48 }, { "epoch": 0.004099410709710479, "grad_norm": 2.3508763313293457, "learning_rate": 8.032786885245902e-06, "loss": 10.5475, "step": 50 }, { "epoch": 0.004263387138098898, "grad_norm": 2.411895513534546, "learning_rate": 8.360655737704919e-06, "loss": 10.4924, "step": 52 }, { "epoch": 0.004427363566487317, "grad_norm": 2.3643436431884766, "learning_rate": 8.688524590163935e-06, "loss": 10.4639, "step": 54 }, { "epoch": 0.004591339994875736, "grad_norm": 2.3757126331329346, "learning_rate": 9.016393442622952e-06, "loss": 10.4338, "step": 56 }, { "epoch": 0.004755316423264155, "grad_norm": 2.3393900394439697, "learning_rate": 9.344262295081968e-06, "loss": 10.3862, "step": 58 }, { "epoch": 0.004919292851652575, "grad_norm": 2.3030688762664795, "learning_rate": 9.672131147540984e-06, "loss": 10.3828, "step": 60 }, { "epoch": 0.005083269280040994, "grad_norm": 2.290266275405884, "learning_rate": 1e-05, "loss": 10.3545, "step": 62 }, { "epoch": 0.005247245708429413, "grad_norm": 2.3040578365325928, "learning_rate": 1.0327868852459017e-05, "loss": 10.3075, "step": 64 }, { "epoch": 0.0054112221368178325, "grad_norm": 2.2848293781280518, "learning_rate": 1.0655737704918032e-05, "loss": 10.2883, "step": 66 }, { "epoch": 0.0055751985652062516, "grad_norm": 2.261911630630493, "learning_rate": 1.0983606557377048e-05, "loss": 10.2328, "step": 68 }, { "epoch": 0.005739174993594671, "grad_norm": 2.2474453449249268, "learning_rate": 1.1311475409836065e-05, "loss": 10.2065, "step": 70 }, { "epoch": 0.00590315142198309, "grad_norm": 2.2612874507904053, "learning_rate": 1.1639344262295083e-05, "loss": 10.1581, "step": 72 }, { "epoch": 0.006067127850371509, "grad_norm": 2.293903112411499, "learning_rate": 1.19672131147541e-05, "loss": 10.0998, "step": 74 }, { "epoch": 0.006231104278759929, "grad_norm": 2.2207071781158447, "learning_rate": 1.2295081967213116e-05, "loss": 10.087, "step": 76 }, { "epoch": 0.006395080707148348, "grad_norm": 2.2724802494049072, "learning_rate": 1.2622950819672132e-05, "loss": 10.006, "step": 78 }, { "epoch": 0.006559057135536767, "grad_norm": 2.18399977684021, "learning_rate": 1.2950819672131149e-05, "loss": 9.9685, "step": 80 }, { "epoch": 0.006723033563925186, "grad_norm": 2.2227847576141357, "learning_rate": 1.3278688524590163e-05, "loss": 9.8784, "step": 82 }, { "epoch": 0.006887009992313605, "grad_norm": 2.275341033935547, "learning_rate": 1.3606557377049181e-05, "loss": 9.7873, "step": 84 }, { "epoch": 0.007050986420702024, "grad_norm": 2.170790195465088, "learning_rate": 1.3934426229508196e-05, "loss": 9.758, "step": 86 }, { "epoch": 0.007214962849090443, "grad_norm": 2.3812711238861084, "learning_rate": 1.4262295081967214e-05, "loss": 9.6872, "step": 88 }, { "epoch": 0.007378939277478862, "grad_norm": 2.2956268787384033, "learning_rate": 1.4590163934426229e-05, "loss": 9.6342, "step": 90 }, { "epoch": 0.007542915705867281, "grad_norm": 4.3424859046936035, "learning_rate": 1.4918032786885247e-05, "loss": 9.599, "step": 92 }, { "epoch": 0.007706892134255701, "grad_norm": 2.303790807723999, "learning_rate": 1.5245901639344262e-05, "loss": 9.4832, "step": 94 }, { "epoch": 0.00787086856264412, "grad_norm": 2.4979960918426514, "learning_rate": 1.557377049180328e-05, "loss": 9.4503, "step": 96 }, { "epoch": 0.008034844991032538, "grad_norm": 1.9802457094192505, "learning_rate": 1.5901639344262295e-05, "loss": 9.3706, "step": 98 }, { "epoch": 0.008198821419420958, "grad_norm": 2.263692855834961, "learning_rate": 1.6229508196721314e-05, "loss": 9.3359, "step": 100 }, { "epoch": 0.008362797847809378, "grad_norm": 2.014167547225952, "learning_rate": 1.6557377049180328e-05, "loss": 9.2529, "step": 102 }, { "epoch": 0.008526774276197796, "grad_norm": 2.3031680583953857, "learning_rate": 1.6885245901639347e-05, "loss": 9.1999, "step": 104 }, { "epoch": 0.008690750704586216, "grad_norm": 1.8865060806274414, "learning_rate": 1.721311475409836e-05, "loss": 9.2385, "step": 106 }, { "epoch": 0.008854727132974635, "grad_norm": 1.783657431602478, "learning_rate": 1.754098360655738e-05, "loss": 9.1613, "step": 108 }, { "epoch": 0.009018703561363054, "grad_norm": 1.7754466533660889, "learning_rate": 1.7868852459016393e-05, "loss": 9.0875, "step": 110 }, { "epoch": 0.009182679989751473, "grad_norm": 2.1157960891723633, "learning_rate": 1.8196721311475413e-05, "loss": 9.0315, "step": 112 }, { "epoch": 0.009346656418139893, "grad_norm": 2.1570520401000977, "learning_rate": 1.8524590163934426e-05, "loss": 8.988, "step": 114 }, { "epoch": 0.00951063284652831, "grad_norm": 2.070383071899414, "learning_rate": 1.8852459016393442e-05, "loss": 8.9084, "step": 116 }, { "epoch": 0.00967460927491673, "grad_norm": 2.004547119140625, "learning_rate": 1.918032786885246e-05, "loss": 8.8827, "step": 118 }, { "epoch": 0.00983858570330515, "grad_norm": 2.282608985900879, "learning_rate": 1.9508196721311475e-05, "loss": 8.8536, "step": 120 }, { "epoch": 0.010002562131693569, "grad_norm": 1.872442364692688, "learning_rate": 1.9836065573770492e-05, "loss": 8.8046, "step": 122 }, { "epoch": 0.010166538560081989, "grad_norm": 2.8452837467193604, "learning_rate": 2.0163934426229508e-05, "loss": 8.7791, "step": 124 }, { "epoch": 0.010330514988470407, "grad_norm": 2.155548095703125, "learning_rate": 2.0491803278688525e-05, "loss": 8.7161, "step": 126 }, { "epoch": 0.010494491416858827, "grad_norm": 2.887465238571167, "learning_rate": 2.081967213114754e-05, "loss": 8.646, "step": 128 }, { "epoch": 0.010658467845247245, "grad_norm": 1.7281243801116943, "learning_rate": 2.114754098360656e-05, "loss": 8.6342, "step": 130 }, { "epoch": 0.010822444273635665, "grad_norm": 2.309556484222412, "learning_rate": 2.1475409836065574e-05, "loss": 8.6055, "step": 132 }, { "epoch": 0.010986420702024083, "grad_norm": 2.6733663082122803, "learning_rate": 2.1803278688524594e-05, "loss": 8.5734, "step": 134 }, { "epoch": 0.011150397130412503, "grad_norm": 1.8595812320709229, "learning_rate": 2.2131147540983607e-05, "loss": 8.5053, "step": 136 }, { "epoch": 0.011314373558800923, "grad_norm": 2.128081798553467, "learning_rate": 2.2459016393442626e-05, "loss": 8.5065, "step": 138 }, { "epoch": 0.011478349987189341, "grad_norm": 2.7606353759765625, "learning_rate": 2.278688524590164e-05, "loss": 8.4438, "step": 140 }, { "epoch": 0.011642326415577761, "grad_norm": 4.279053688049316, "learning_rate": 2.311475409836066e-05, "loss": 8.3871, "step": 142 }, { "epoch": 0.01180630284396618, "grad_norm": 2.8464019298553467, "learning_rate": 2.3442622950819672e-05, "loss": 8.3569, "step": 144 }, { "epoch": 0.0119702792723546, "grad_norm": 1.880401611328125, "learning_rate": 2.377049180327869e-05, "loss": 8.3394, "step": 146 }, { "epoch": 0.012134255700743017, "grad_norm": 1.8325446844100952, "learning_rate": 2.4098360655737705e-05, "loss": 8.2505, "step": 148 }, { "epoch": 0.012298232129131437, "grad_norm": 2.5180068016052246, "learning_rate": 2.442622950819672e-05, "loss": 8.2408, "step": 150 }, { "epoch": 0.012462208557519857, "grad_norm": 1.706740379333496, "learning_rate": 2.4754098360655738e-05, "loss": 8.2324, "step": 152 }, { "epoch": 0.012626184985908276, "grad_norm": 1.7287302017211914, "learning_rate": 2.5081967213114754e-05, "loss": 8.2029, "step": 154 }, { "epoch": 0.012790161414296695, "grad_norm": 2.2318031787872314, "learning_rate": 2.540983606557377e-05, "loss": 8.1472, "step": 156 }, { "epoch": 0.012954137842685114, "grad_norm": 2.371234655380249, "learning_rate": 2.573770491803279e-05, "loss": 8.0911, "step": 158 }, { "epoch": 0.013118114271073534, "grad_norm": 2.0684690475463867, "learning_rate": 2.6065573770491804e-05, "loss": 8.0566, "step": 160 }, { "epoch": 0.013282090699461952, "grad_norm": 2.811659097671509, "learning_rate": 2.639344262295082e-05, "loss": 8.0289, "step": 162 }, { "epoch": 0.013446067127850372, "grad_norm": 1.9651939868927002, "learning_rate": 2.6721311475409837e-05, "loss": 7.9698, "step": 164 }, { "epoch": 0.01361004355623879, "grad_norm": 2.164975166320801, "learning_rate": 2.7049180327868856e-05, "loss": 7.9336, "step": 166 }, { "epoch": 0.01377401998462721, "grad_norm": 1.353088140487671, "learning_rate": 2.737704918032787e-05, "loss": 7.9496, "step": 168 }, { "epoch": 0.01393799641301563, "grad_norm": 1.452012538909912, "learning_rate": 2.7704918032786886e-05, "loss": 7.8609, "step": 170 }, { "epoch": 0.014101972841404048, "grad_norm": 2.3023464679718018, "learning_rate": 2.8032786885245906e-05, "loss": 7.8505, "step": 172 }, { "epoch": 0.014265949269792468, "grad_norm": 4.203457832336426, "learning_rate": 2.8360655737704922e-05, "loss": 7.8213, "step": 174 }, { "epoch": 0.014429925698180886, "grad_norm": 3.246518135070801, "learning_rate": 2.8688524590163935e-05, "loss": 7.7589, "step": 176 }, { "epoch": 0.014593902126569306, "grad_norm": 2.139510154724121, "learning_rate": 2.901639344262295e-05, "loss": 7.8026, "step": 178 }, { "epoch": 0.014757878554957724, "grad_norm": 1.7982274293899536, "learning_rate": 2.934426229508197e-05, "loss": 7.7061, "step": 180 }, { "epoch": 0.014921854983346144, "grad_norm": 2.1394877433776855, "learning_rate": 2.967213114754098e-05, "loss": 7.6849, "step": 182 }, { "epoch": 0.015085831411734562, "grad_norm": 1.9051532745361328, "learning_rate": 3e-05, "loss": 7.6823, "step": 184 }, { "epoch": 0.015249807840122982, "grad_norm": 2.402742862701416, "learning_rate": 3.0327868852459017e-05, "loss": 7.6698, "step": 186 }, { "epoch": 0.015413784268511402, "grad_norm": 2.6151533126831055, "learning_rate": 3.065573770491804e-05, "loss": 7.5872, "step": 188 }, { "epoch": 0.01557776069689982, "grad_norm": 1.7072643041610718, "learning_rate": 3.098360655737705e-05, "loss": 7.5527, "step": 190 }, { "epoch": 0.01574173712528824, "grad_norm": 1.5905566215515137, "learning_rate": 3.131147540983606e-05, "loss": 7.5412, "step": 192 }, { "epoch": 0.01590571355367666, "grad_norm": 2.3824453353881836, "learning_rate": 3.163934426229508e-05, "loss": 7.5453, "step": 194 }, { "epoch": 0.016069689982065077, "grad_norm": 1.4948384761810303, "learning_rate": 3.19672131147541e-05, "loss": 7.4671, "step": 196 }, { "epoch": 0.0162336664104535, "grad_norm": 2.2609403133392334, "learning_rate": 3.2295081967213116e-05, "loss": 7.4387, "step": 198 }, { "epoch": 0.016397642838841917, "grad_norm": 1.8224328756332397, "learning_rate": 3.2622950819672136e-05, "loss": 7.3725, "step": 200 }, { "epoch": 0.016561619267230335, "grad_norm": 1.9965217113494873, "learning_rate": 3.295081967213115e-05, "loss": 7.3654, "step": 202 }, { "epoch": 0.016725595695618756, "grad_norm": 1.1850310564041138, "learning_rate": 3.327868852459017e-05, "loss": 7.3385, "step": 204 }, { "epoch": 0.016889572124007175, "grad_norm": 1.642038106918335, "learning_rate": 3.360655737704918e-05, "loss": 7.3, "step": 206 }, { "epoch": 0.017053548552395593, "grad_norm": 1.9536902904510498, "learning_rate": 3.39344262295082e-05, "loss": 7.273, "step": 208 }, { "epoch": 0.01721752498078401, "grad_norm": 1.829714059829712, "learning_rate": 3.4262295081967214e-05, "loss": 7.2287, "step": 210 }, { "epoch": 0.017381501409172433, "grad_norm": 2.4997904300689697, "learning_rate": 3.459016393442623e-05, "loss": 7.2496, "step": 212 }, { "epoch": 0.01754547783756085, "grad_norm": 3.472687244415283, "learning_rate": 3.491803278688525e-05, "loss": 7.1957, "step": 214 }, { "epoch": 0.01770945426594927, "grad_norm": 3.043635845184326, "learning_rate": 3.524590163934427e-05, "loss": 7.1526, "step": 216 }, { "epoch": 0.01787343069433769, "grad_norm": 3.5498316287994385, "learning_rate": 3.557377049180328e-05, "loss": 7.1433, "step": 218 }, { "epoch": 0.01803740712272611, "grad_norm": 2.9172403812408447, "learning_rate": 3.590163934426229e-05, "loss": 7.1423, "step": 220 }, { "epoch": 0.018201383551114527, "grad_norm": 2.57663893699646, "learning_rate": 3.622950819672131e-05, "loss": 7.0919, "step": 222 }, { "epoch": 0.018365359979502945, "grad_norm": 1.6703250408172607, "learning_rate": 3.655737704918033e-05, "loss": 7.061, "step": 224 }, { "epoch": 0.018529336407891367, "grad_norm": 1.2947953939437866, "learning_rate": 3.6885245901639346e-05, "loss": 7.0452, "step": 226 }, { "epoch": 0.018693312836279785, "grad_norm": 1.5165050029754639, "learning_rate": 3.721311475409836e-05, "loss": 7.0161, "step": 228 }, { "epoch": 0.018857289264668203, "grad_norm": 2.0093023777008057, "learning_rate": 3.754098360655738e-05, "loss": 7.0072, "step": 230 }, { "epoch": 0.01902126569305662, "grad_norm": 2.3759045600891113, "learning_rate": 3.78688524590164e-05, "loss": 6.9464, "step": 232 }, { "epoch": 0.019185242121445043, "grad_norm": 2.2470510005950928, "learning_rate": 3.819672131147541e-05, "loss": 6.9827, "step": 234 }, { "epoch": 0.01934921854983346, "grad_norm": 1.9213268756866455, "learning_rate": 3.8524590163934424e-05, "loss": 6.938, "step": 236 }, { "epoch": 0.01951319497822188, "grad_norm": 1.64090895652771, "learning_rate": 3.8852459016393444e-05, "loss": 6.8792, "step": 238 }, { "epoch": 0.0196771714066103, "grad_norm": 1.7019102573394775, "learning_rate": 3.9180327868852464e-05, "loss": 6.9123, "step": 240 }, { "epoch": 0.01984114783499872, "grad_norm": 1.2651671171188354, "learning_rate": 3.950819672131148e-05, "loss": 6.8878, "step": 242 }, { "epoch": 0.020005124263387138, "grad_norm": 1.6623637676239014, "learning_rate": 3.983606557377049e-05, "loss": 6.8734, "step": 244 }, { "epoch": 0.020169100691775556, "grad_norm": 2.097914695739746, "learning_rate": 4.016393442622951e-05, "loss": 6.8284, "step": 246 }, { "epoch": 0.020333077120163977, "grad_norm": 1.9533648490905762, "learning_rate": 4.049180327868853e-05, "loss": 6.8234, "step": 248 }, { "epoch": 0.020497053548552396, "grad_norm": 3.1760356426239014, "learning_rate": 4.081967213114754e-05, "loss": 6.8269, "step": 250 }, { "epoch": 0.020661029976940814, "grad_norm": 3.1443092823028564, "learning_rate": 4.1147540983606556e-05, "loss": 6.8064, "step": 252 }, { "epoch": 0.020825006405329236, "grad_norm": 2.328242301940918, "learning_rate": 4.1475409836065575e-05, "loss": 6.7787, "step": 254 }, { "epoch": 0.020988982833717654, "grad_norm": 1.6525546312332153, "learning_rate": 4.1803278688524595e-05, "loss": 6.7703, "step": 256 }, { "epoch": 0.021152959262106072, "grad_norm": 1.2344805002212524, "learning_rate": 4.213114754098361e-05, "loss": 6.7074, "step": 258 }, { "epoch": 0.02131693569049449, "grad_norm": 1.625827670097351, "learning_rate": 4.245901639344262e-05, "loss": 6.7281, "step": 260 }, { "epoch": 0.021480912118882912, "grad_norm": 1.2819187641143799, "learning_rate": 4.278688524590164e-05, "loss": 6.7493, "step": 262 }, { "epoch": 0.02164488854727133, "grad_norm": 1.8072625398635864, "learning_rate": 4.311475409836066e-05, "loss": 6.6968, "step": 264 }, { "epoch": 0.021808864975659748, "grad_norm": 1.3799159526824951, "learning_rate": 4.3442622950819674e-05, "loss": 6.688, "step": 266 }, { "epoch": 0.021972841404048166, "grad_norm": 2.2259645462036133, "learning_rate": 4.377049180327869e-05, "loss": 6.6849, "step": 268 }, { "epoch": 0.022136817832436588, "grad_norm": 2.3046491146087646, "learning_rate": 4.409836065573771e-05, "loss": 6.6438, "step": 270 }, { "epoch": 0.022300794260825006, "grad_norm": 2.3048136234283447, "learning_rate": 4.442622950819673e-05, "loss": 6.5669, "step": 272 }, { "epoch": 0.022464770689213424, "grad_norm": 1.427413821220398, "learning_rate": 4.475409836065574e-05, "loss": 6.6016, "step": 274 }, { "epoch": 0.022628747117601846, "grad_norm": 1.9164036512374878, "learning_rate": 4.508196721311476e-05, "loss": 6.5754, "step": 276 }, { "epoch": 0.022792723545990264, "grad_norm": 2.1851377487182617, "learning_rate": 4.540983606557377e-05, "loss": 6.5257, "step": 278 }, { "epoch": 0.022956699974378682, "grad_norm": 1.622302770614624, "learning_rate": 4.5737704918032786e-05, "loss": 6.5406, "step": 280 }, { "epoch": 0.0231206764027671, "grad_norm": 1.7634437084197998, "learning_rate": 4.6065573770491805e-05, "loss": 6.5259, "step": 282 }, { "epoch": 0.023284652831155522, "grad_norm": 2.2173545360565186, "learning_rate": 4.6393442622950825e-05, "loss": 6.5383, "step": 284 }, { "epoch": 0.02344862925954394, "grad_norm": 1.620126724243164, "learning_rate": 4.672131147540984e-05, "loss": 6.5118, "step": 286 }, { "epoch": 0.02361260568793236, "grad_norm": 2.0221054553985596, "learning_rate": 4.704918032786885e-05, "loss": 6.4726, "step": 288 }, { "epoch": 0.02377658211632078, "grad_norm": 1.2866939306259155, "learning_rate": 4.737704918032787e-05, "loss": 6.4646, "step": 290 }, { "epoch": 0.0239405585447092, "grad_norm": 1.9667149782180786, "learning_rate": 4.770491803278689e-05, "loss": 6.4897, "step": 292 }, { "epoch": 0.024104534973097617, "grad_norm": 2.3963539600372314, "learning_rate": 4.8032786885245904e-05, "loss": 6.4017, "step": 294 }, { "epoch": 0.024268511401486035, "grad_norm": 2.0867695808410645, "learning_rate": 4.836065573770492e-05, "loss": 6.4881, "step": 296 }, { "epoch": 0.024432487829874457, "grad_norm": 2.1272687911987305, "learning_rate": 4.868852459016394e-05, "loss": 6.3954, "step": 298 }, { "epoch": 0.024596464258262875, "grad_norm": 1.3982235193252563, "learning_rate": 4.9016393442622957e-05, "loss": 6.4431, "step": 300 }, { "epoch": 0.024760440686651293, "grad_norm": 2.191251039505005, "learning_rate": 4.934426229508197e-05, "loss": 6.4309, "step": 302 }, { "epoch": 0.024924417115039715, "grad_norm": 1.7500178813934326, "learning_rate": 4.967213114754098e-05, "loss": 6.3676, "step": 304 }, { "epoch": 0.025088393543428133, "grad_norm": 1.4814640283584595, "learning_rate": 5e-05, "loss": 6.4043, "step": 306 }, { "epoch": 0.02525236997181655, "grad_norm": 1.549419641494751, "learning_rate": 5.0327868852459015e-05, "loss": 6.3276, "step": 308 }, { "epoch": 0.02541634640020497, "grad_norm": 1.4538007974624634, "learning_rate": 5.0655737704918035e-05, "loss": 6.2641, "step": 310 }, { "epoch": 0.02558032282859339, "grad_norm": 1.4905295372009277, "learning_rate": 5.098360655737705e-05, "loss": 6.3035, "step": 312 }, { "epoch": 0.02574429925698181, "grad_norm": 1.6055713891983032, "learning_rate": 5.131147540983606e-05, "loss": 6.3119, "step": 314 }, { "epoch": 0.025908275685370227, "grad_norm": 1.8923226594924927, "learning_rate": 5.163934426229509e-05, "loss": 6.2762, "step": 316 }, { "epoch": 0.026072252113758645, "grad_norm": 1.6730965375900269, "learning_rate": 5.19672131147541e-05, "loss": 6.262, "step": 318 }, { "epoch": 0.026236228542147067, "grad_norm": 1.7695355415344238, "learning_rate": 5.229508196721312e-05, "loss": 6.2443, "step": 320 }, { "epoch": 0.026400204970535485, "grad_norm": 1.5890907049179077, "learning_rate": 5.2622950819672134e-05, "loss": 6.2874, "step": 322 }, { "epoch": 0.026564181398923904, "grad_norm": 1.351945161819458, "learning_rate": 5.295081967213115e-05, "loss": 6.1962, "step": 324 }, { "epoch": 0.026728157827312325, "grad_norm": 1.8272804021835327, "learning_rate": 5.327868852459017e-05, "loss": 6.2117, "step": 326 }, { "epoch": 0.026892134255700743, "grad_norm": 1.517553448677063, "learning_rate": 5.360655737704918e-05, "loss": 6.226, "step": 328 }, { "epoch": 0.02705611068408916, "grad_norm": 1.2578155994415283, "learning_rate": 5.393442622950819e-05, "loss": 6.1564, "step": 330 }, { "epoch": 0.02722008711247758, "grad_norm": 1.3466731309890747, "learning_rate": 5.426229508196722e-05, "loss": 6.2005, "step": 332 }, { "epoch": 0.027384063540866, "grad_norm": 1.8164156675338745, "learning_rate": 5.459016393442623e-05, "loss": 6.2395, "step": 334 }, { "epoch": 0.02754803996925442, "grad_norm": 1.9037235975265503, "learning_rate": 5.491803278688525e-05, "loss": 6.1536, "step": 336 }, { "epoch": 0.027712016397642838, "grad_norm": 2.0508320331573486, "learning_rate": 5.5245901639344265e-05, "loss": 6.1402, "step": 338 }, { "epoch": 0.02787599282603126, "grad_norm": 2.135850667953491, "learning_rate": 5.557377049180328e-05, "loss": 6.1144, "step": 340 }, { "epoch": 0.028039969254419678, "grad_norm": 1.4268845319747925, "learning_rate": 5.5901639344262305e-05, "loss": 6.0949, "step": 342 }, { "epoch": 0.028203945682808096, "grad_norm": 1.7478644847869873, "learning_rate": 5.622950819672132e-05, "loss": 6.0932, "step": 344 }, { "epoch": 0.028367922111196514, "grad_norm": 1.3711293935775757, "learning_rate": 5.6557377049180324e-05, "loss": 6.0383, "step": 346 }, { "epoch": 0.028531898539584936, "grad_norm": 2.102510929107666, "learning_rate": 5.688524590163935e-05, "loss": 6.151, "step": 348 }, { "epoch": 0.028695874967973354, "grad_norm": 2.1685709953308105, "learning_rate": 5.7213114754098364e-05, "loss": 6.0951, "step": 350 }, { "epoch": 0.028859851396361772, "grad_norm": 1.92462158203125, "learning_rate": 5.754098360655738e-05, "loss": 6.1199, "step": 352 }, { "epoch": 0.029023827824750194, "grad_norm": 2.6841022968292236, "learning_rate": 5.7868852459016396e-05, "loss": 6.0132, "step": 354 }, { "epoch": 0.029187804253138612, "grad_norm": 2.1786410808563232, "learning_rate": 5.819672131147541e-05, "loss": 6.0524, "step": 356 }, { "epoch": 0.02935178068152703, "grad_norm": 2.2823619842529297, "learning_rate": 5.8524590163934436e-05, "loss": 6.0581, "step": 358 }, { "epoch": 0.02951575710991545, "grad_norm": 1.539980173110962, "learning_rate": 5.885245901639345e-05, "loss": 6.0375, "step": 360 }, { "epoch": 0.02967973353830387, "grad_norm": 2.02248215675354, "learning_rate": 5.9180327868852455e-05, "loss": 6.024, "step": 362 }, { "epoch": 0.029843709966692288, "grad_norm": 2.0817408561706543, "learning_rate": 5.950819672131148e-05, "loss": 5.9895, "step": 364 }, { "epoch": 0.030007686395080706, "grad_norm": 2.433933973312378, "learning_rate": 5.9836065573770495e-05, "loss": 5.9542, "step": 366 }, { "epoch": 0.030171662823469125, "grad_norm": 1.9378852844238281, "learning_rate": 6.016393442622951e-05, "loss": 6.0731, "step": 368 }, { "epoch": 0.030335639251857546, "grad_norm": 1.5799286365509033, "learning_rate": 6.049180327868853e-05, "loss": 5.941, "step": 370 }, { "epoch": 0.030499615680245964, "grad_norm": 1.8533333539962769, "learning_rate": 6.081967213114754e-05, "loss": 5.9119, "step": 372 }, { "epoch": 0.030663592108634383, "grad_norm": 1.8558950424194336, "learning_rate": 6.114754098360656e-05, "loss": 5.9879, "step": 374 }, { "epoch": 0.030827568537022804, "grad_norm": 1.987197756767273, "learning_rate": 6.147540983606557e-05, "loss": 5.9686, "step": 376 }, { "epoch": 0.030991544965411223, "grad_norm": 2.317286491394043, "learning_rate": 6.180327868852459e-05, "loss": 5.9557, "step": 378 }, { "epoch": 0.03115552139379964, "grad_norm": 2.142669916152954, "learning_rate": 6.213114754098361e-05, "loss": 5.8988, "step": 380 }, { "epoch": 0.03131949782218806, "grad_norm": 2.495762825012207, "learning_rate": 6.245901639344263e-05, "loss": 5.9131, "step": 382 }, { "epoch": 0.03148347425057648, "grad_norm": 1.6954542398452759, "learning_rate": 6.278688524590164e-05, "loss": 5.9113, "step": 384 }, { "epoch": 0.0316474506789649, "grad_norm": 1.8803491592407227, "learning_rate": 6.311475409836067e-05, "loss": 5.976, "step": 386 }, { "epoch": 0.03181142710735332, "grad_norm": 1.8088006973266602, "learning_rate": 6.344262295081968e-05, "loss": 5.9088, "step": 388 }, { "epoch": 0.031975403535741735, "grad_norm": 1.9314664602279663, "learning_rate": 6.377049180327869e-05, "loss": 5.8998, "step": 390 }, { "epoch": 0.03213937996413015, "grad_norm": 2.3078742027282715, "learning_rate": 6.40983606557377e-05, "loss": 5.9263, "step": 392 }, { "epoch": 0.03230335639251858, "grad_norm": 2.3376059532165527, "learning_rate": 6.442622950819672e-05, "loss": 5.8472, "step": 394 }, { "epoch": 0.032467332820907, "grad_norm": 2.106436252593994, "learning_rate": 6.475409836065574e-05, "loss": 5.8755, "step": 396 }, { "epoch": 0.032631309249295415, "grad_norm": 1.564982533454895, "learning_rate": 6.508196721311476e-05, "loss": 5.8482, "step": 398 }, { "epoch": 0.03279528567768383, "grad_norm": 1.3622092008590698, "learning_rate": 6.540983606557377e-05, "loss": 5.8217, "step": 400 }, { "epoch": 0.03295926210607225, "grad_norm": 2.422950029373169, "learning_rate": 6.57377049180328e-05, "loss": 5.8151, "step": 402 }, { "epoch": 0.03312323853446067, "grad_norm": 2.1505627632141113, "learning_rate": 6.606557377049181e-05, "loss": 5.8411, "step": 404 }, { "epoch": 0.03328721496284909, "grad_norm": 2.70080304145813, "learning_rate": 6.639344262295082e-05, "loss": 5.8123, "step": 406 }, { "epoch": 0.03345119139123751, "grad_norm": 2.5012848377227783, "learning_rate": 6.672131147540984e-05, "loss": 5.8724, "step": 408 }, { "epoch": 0.03361516781962593, "grad_norm": 1.424917459487915, "learning_rate": 6.704918032786885e-05, "loss": 5.7876, "step": 410 }, { "epoch": 0.03377914424801435, "grad_norm": 2.5553343296051025, "learning_rate": 6.737704918032786e-05, "loss": 5.7938, "step": 412 }, { "epoch": 0.03394312067640277, "grad_norm": 1.8178255558013916, "learning_rate": 6.770491803278689e-05, "loss": 5.8053, "step": 414 }, { "epoch": 0.034107097104791186, "grad_norm": 1.838123083114624, "learning_rate": 6.80327868852459e-05, "loss": 5.7617, "step": 416 }, { "epoch": 0.034271073533179604, "grad_norm": 2.1789233684539795, "learning_rate": 6.836065573770493e-05, "loss": 5.7346, "step": 418 }, { "epoch": 0.03443504996156802, "grad_norm": 1.6680387258529663, "learning_rate": 6.868852459016394e-05, "loss": 5.7538, "step": 420 }, { "epoch": 0.03459902638995645, "grad_norm": 1.8677077293395996, "learning_rate": 6.901639344262295e-05, "loss": 5.7391, "step": 422 }, { "epoch": 0.034763002818344865, "grad_norm": 1.6001735925674438, "learning_rate": 6.934426229508197e-05, "loss": 5.7602, "step": 424 }, { "epoch": 0.03492697924673328, "grad_norm": 1.8411906957626343, "learning_rate": 6.967213114754098e-05, "loss": 5.7353, "step": 426 }, { "epoch": 0.0350909556751217, "grad_norm": 1.8191148042678833, "learning_rate": 7e-05, "loss": 5.6915, "step": 428 }, { "epoch": 0.03525493210351012, "grad_norm": 1.5732213258743286, "learning_rate": 7.032786885245902e-05, "loss": 5.7435, "step": 430 }, { "epoch": 0.03541890853189854, "grad_norm": 1.490302562713623, "learning_rate": 7.065573770491803e-05, "loss": 5.7039, "step": 432 }, { "epoch": 0.035582884960286956, "grad_norm": 2.1343276500701904, "learning_rate": 7.098360655737706e-05, "loss": 5.6742, "step": 434 }, { "epoch": 0.03574686138867538, "grad_norm": 2.2556490898132324, "learning_rate": 7.131147540983607e-05, "loss": 5.68, "step": 436 }, { "epoch": 0.0359108378170638, "grad_norm": 1.9686386585235596, "learning_rate": 7.163934426229509e-05, "loss": 5.7184, "step": 438 }, { "epoch": 0.03607481424545222, "grad_norm": 1.8209033012390137, "learning_rate": 7.196721311475411e-05, "loss": 5.6744, "step": 440 }, { "epoch": 0.036238790673840636, "grad_norm": 1.4810411930084229, "learning_rate": 7.229508196721311e-05, "loss": 5.6897, "step": 442 }, { "epoch": 0.036402767102229054, "grad_norm": 1.438828706741333, "learning_rate": 7.262295081967213e-05, "loss": 5.7104, "step": 444 }, { "epoch": 0.03656674353061747, "grad_norm": 1.9264169931411743, "learning_rate": 7.295081967213115e-05, "loss": 5.6648, "step": 446 }, { "epoch": 0.03673071995900589, "grad_norm": 1.5463581085205078, "learning_rate": 7.327868852459016e-05, "loss": 5.6325, "step": 448 }, { "epoch": 0.03689469638739431, "grad_norm": 2.308321237564087, "learning_rate": 7.360655737704918e-05, "loss": 5.6288, "step": 450 }, { "epoch": 0.037058672815782734, "grad_norm": 1.6210638284683228, "learning_rate": 7.39344262295082e-05, "loss": 5.5833, "step": 452 }, { "epoch": 0.03722264924417115, "grad_norm": 1.9419602155685425, "learning_rate": 7.426229508196722e-05, "loss": 5.6552, "step": 454 }, { "epoch": 0.03738662567255957, "grad_norm": 1.5004584789276123, "learning_rate": 7.459016393442624e-05, "loss": 5.5908, "step": 456 }, { "epoch": 0.03755060210094799, "grad_norm": 1.7404072284698486, "learning_rate": 7.491803278688526e-05, "loss": 5.5853, "step": 458 }, { "epoch": 0.03771457852933641, "grad_norm": 2.1047239303588867, "learning_rate": 7.524590163934426e-05, "loss": 5.6346, "step": 460 }, { "epoch": 0.037878554957724825, "grad_norm": 2.2261962890625, "learning_rate": 7.557377049180328e-05, "loss": 5.6325, "step": 462 }, { "epoch": 0.03804253138611324, "grad_norm": 1.9876081943511963, "learning_rate": 7.59016393442623e-05, "loss": 5.5733, "step": 464 }, { "epoch": 0.03820650781450167, "grad_norm": 1.9988337755203247, "learning_rate": 7.622950819672131e-05, "loss": 5.5402, "step": 466 }, { "epoch": 0.038370484242890086, "grad_norm": 1.896393060684204, "learning_rate": 7.655737704918034e-05, "loss": 5.5694, "step": 468 }, { "epoch": 0.038534460671278505, "grad_norm": 1.8517329692840576, "learning_rate": 7.688524590163935e-05, "loss": 5.5517, "step": 470 }, { "epoch": 0.03869843709966692, "grad_norm": 2.0797197818756104, "learning_rate": 7.721311475409836e-05, "loss": 5.5111, "step": 472 }, { "epoch": 0.03886241352805534, "grad_norm": 1.4706847667694092, "learning_rate": 7.754098360655739e-05, "loss": 5.5535, "step": 474 }, { "epoch": 0.03902638995644376, "grad_norm": 1.4342091083526611, "learning_rate": 7.78688524590164e-05, "loss": 5.5338, "step": 476 }, { "epoch": 0.03919036638483218, "grad_norm": 1.520163893699646, "learning_rate": 7.819672131147541e-05, "loss": 5.4999, "step": 478 }, { "epoch": 0.0393543428132206, "grad_norm": 1.479134202003479, "learning_rate": 7.852459016393443e-05, "loss": 5.5316, "step": 480 }, { "epoch": 0.03951831924160902, "grad_norm": 1.9230724573135376, "learning_rate": 7.885245901639344e-05, "loss": 5.4864, "step": 482 }, { "epoch": 0.03968229566999744, "grad_norm": 1.561827301979065, "learning_rate": 7.918032786885247e-05, "loss": 5.5004, "step": 484 }, { "epoch": 0.03984627209838586, "grad_norm": 1.9881266355514526, "learning_rate": 7.950819672131148e-05, "loss": 5.5241, "step": 486 }, { "epoch": 0.040010248526774275, "grad_norm": 2.184860944747925, "learning_rate": 7.98360655737705e-05, "loss": 5.4425, "step": 488 }, { "epoch": 0.04017422495516269, "grad_norm": 1.7443156242370605, "learning_rate": 8.016393442622952e-05, "loss": 5.4779, "step": 490 }, { "epoch": 0.04033820138355111, "grad_norm": 1.4682708978652954, "learning_rate": 8.049180327868853e-05, "loss": 5.5347, "step": 492 }, { "epoch": 0.04050217781193954, "grad_norm": 1.3537533283233643, "learning_rate": 8.081967213114755e-05, "loss": 5.4484, "step": 494 }, { "epoch": 0.040666154240327955, "grad_norm": 2.4647626876831055, "learning_rate": 8.114754098360656e-05, "loss": 5.428, "step": 496 }, { "epoch": 0.04083013066871637, "grad_norm": 2.0095391273498535, "learning_rate": 8.147540983606557e-05, "loss": 5.4425, "step": 498 }, { "epoch": 0.04099410709710479, "grad_norm": 2.309438705444336, "learning_rate": 8.18032786885246e-05, "loss": 5.4524, "step": 500 }, { "epoch": 0.04115808352549321, "grad_norm": 1.7151856422424316, "learning_rate": 8.213114754098361e-05, "loss": 5.4337, "step": 502 }, { "epoch": 0.04132205995388163, "grad_norm": 1.7919552326202393, "learning_rate": 8.245901639344262e-05, "loss": 5.4316, "step": 504 }, { "epoch": 0.041486036382270046, "grad_norm": 2.1312031745910645, "learning_rate": 8.278688524590165e-05, "loss": 5.445, "step": 506 }, { "epoch": 0.04165001281065847, "grad_norm": 1.994307518005371, "learning_rate": 8.311475409836066e-05, "loss": 5.3947, "step": 508 }, { "epoch": 0.04181398923904689, "grad_norm": 1.912011981010437, "learning_rate": 8.344262295081968e-05, "loss": 5.358, "step": 510 }, { "epoch": 0.04197796566743531, "grad_norm": 2.522435188293457, "learning_rate": 8.377049180327869e-05, "loss": 5.4445, "step": 512 }, { "epoch": 0.042141942095823726, "grad_norm": 1.8543167114257812, "learning_rate": 8.40983606557377e-05, "loss": 5.4107, "step": 514 }, { "epoch": 0.042305918524212144, "grad_norm": 2.0634872913360596, "learning_rate": 8.442622950819673e-05, "loss": 5.3435, "step": 516 }, { "epoch": 0.04246989495260056, "grad_norm": 1.563451886177063, "learning_rate": 8.475409836065574e-05, "loss": 5.3955, "step": 518 }, { "epoch": 0.04263387138098898, "grad_norm": 1.305403709411621, "learning_rate": 8.508196721311476e-05, "loss": 5.3148, "step": 520 }, { "epoch": 0.042797847809377405, "grad_norm": 1.9041563272476196, "learning_rate": 8.540983606557378e-05, "loss": 5.3892, "step": 522 }, { "epoch": 0.042961824237765824, "grad_norm": 1.7804834842681885, "learning_rate": 8.57377049180328e-05, "loss": 5.3762, "step": 524 }, { "epoch": 0.04312580066615424, "grad_norm": 1.959104299545288, "learning_rate": 8.606557377049181e-05, "loss": 5.3638, "step": 526 }, { "epoch": 0.04328977709454266, "grad_norm": 2.4005024433135986, "learning_rate": 8.639344262295082e-05, "loss": 5.3913, "step": 528 }, { "epoch": 0.04345375352293108, "grad_norm": 1.8313933610916138, "learning_rate": 8.672131147540983e-05, "loss": 5.3175, "step": 530 }, { "epoch": 0.043617729951319496, "grad_norm": 1.708949327468872, "learning_rate": 8.704918032786885e-05, "loss": 5.3506, "step": 532 }, { "epoch": 0.043781706379707915, "grad_norm": 1.9135140180587769, "learning_rate": 8.737704918032787e-05, "loss": 5.3197, "step": 534 }, { "epoch": 0.04394568280809633, "grad_norm": 2.3220021724700928, "learning_rate": 8.770491803278689e-05, "loss": 5.3148, "step": 536 }, { "epoch": 0.04410965923648476, "grad_norm": 1.6934478282928467, "learning_rate": 8.803278688524591e-05, "loss": 5.3818, "step": 538 }, { "epoch": 0.044273635664873176, "grad_norm": 1.633090615272522, "learning_rate": 8.836065573770493e-05, "loss": 5.2976, "step": 540 }, { "epoch": 0.044437612093261594, "grad_norm": 2.194674253463745, "learning_rate": 8.868852459016394e-05, "loss": 5.2909, "step": 542 }, { "epoch": 0.04460158852165001, "grad_norm": 1.8494622707366943, "learning_rate": 8.901639344262295e-05, "loss": 5.3178, "step": 544 }, { "epoch": 0.04476556495003843, "grad_norm": 1.523157000541687, "learning_rate": 8.934426229508197e-05, "loss": 5.2934, "step": 546 }, { "epoch": 0.04492954137842685, "grad_norm": 1.7002984285354614, "learning_rate": 8.967213114754098e-05, "loss": 5.3107, "step": 548 }, { "epoch": 0.04509351780681527, "grad_norm": 1.9046440124511719, "learning_rate": 9e-05, "loss": 5.3225, "step": 550 }, { "epoch": 0.04525749423520369, "grad_norm": 1.8026628494262695, "learning_rate": 9.032786885245902e-05, "loss": 5.3129, "step": 552 }, { "epoch": 0.04542147066359211, "grad_norm": 1.6067262887954712, "learning_rate": 9.065573770491805e-05, "loss": 5.2214, "step": 554 }, { "epoch": 0.04558544709198053, "grad_norm": 1.669403314590454, "learning_rate": 9.098360655737706e-05, "loss": 5.274, "step": 556 }, { "epoch": 0.04574942352036895, "grad_norm": 1.7327196598052979, "learning_rate": 9.131147540983607e-05, "loss": 5.3007, "step": 558 }, { "epoch": 0.045913399948757365, "grad_norm": 1.4521604776382446, "learning_rate": 9.163934426229508e-05, "loss": 5.2284, "step": 560 }, { "epoch": 0.04607737637714578, "grad_norm": 1.5612112283706665, "learning_rate": 9.19672131147541e-05, "loss": 5.2094, "step": 562 }, { "epoch": 0.0462413528055342, "grad_norm": 2.2565908432006836, "learning_rate": 9.229508196721311e-05, "loss": 5.25, "step": 564 }, { "epoch": 0.046405329233922626, "grad_norm": 2.040969133377075, "learning_rate": 9.262295081967214e-05, "loss": 5.2399, "step": 566 }, { "epoch": 0.046569305662311045, "grad_norm": 1.8394721746444702, "learning_rate": 9.295081967213115e-05, "loss": 5.2433, "step": 568 }, { "epoch": 0.04673328209069946, "grad_norm": 2.2264137268066406, "learning_rate": 9.327868852459016e-05, "loss": 5.1826, "step": 570 }, { "epoch": 0.04689725851908788, "grad_norm": 1.537869930267334, "learning_rate": 9.360655737704919e-05, "loss": 5.2544, "step": 572 }, { "epoch": 0.0470612349474763, "grad_norm": 1.6794589757919312, "learning_rate": 9.39344262295082e-05, "loss": 5.2355, "step": 574 }, { "epoch": 0.04722521137586472, "grad_norm": 2.1024844646453857, "learning_rate": 9.426229508196722e-05, "loss": 5.2308, "step": 576 }, { "epoch": 0.047389187804253136, "grad_norm": 1.6713175773620605, "learning_rate": 9.459016393442623e-05, "loss": 5.1545, "step": 578 }, { "epoch": 0.04755316423264156, "grad_norm": 1.6628456115722656, "learning_rate": 9.491803278688524e-05, "loss": 5.1741, "step": 580 }, { "epoch": 0.04771714066102998, "grad_norm": 1.4492676258087158, "learning_rate": 9.524590163934427e-05, "loss": 5.1566, "step": 582 }, { "epoch": 0.0478811170894184, "grad_norm": 1.794235110282898, "learning_rate": 9.557377049180328e-05, "loss": 5.1699, "step": 584 }, { "epoch": 0.048045093517806815, "grad_norm": 1.934901475906372, "learning_rate": 9.59016393442623e-05, "loss": 5.1533, "step": 586 }, { "epoch": 0.048209069946195234, "grad_norm": 1.2630641460418701, "learning_rate": 9.622950819672132e-05, "loss": 5.1782, "step": 588 }, { "epoch": 0.04837304637458365, "grad_norm": 1.4576668739318848, "learning_rate": 9.655737704918033e-05, "loss": 5.1815, "step": 590 }, { "epoch": 0.04853702280297207, "grad_norm": 1.842677354812622, "learning_rate": 9.688524590163936e-05, "loss": 5.1813, "step": 592 }, { "epoch": 0.048700999231360495, "grad_norm": 1.393120288848877, "learning_rate": 9.721311475409836e-05, "loss": 5.2008, "step": 594 }, { "epoch": 0.04886497565974891, "grad_norm": 1.789939522743225, "learning_rate": 9.754098360655737e-05, "loss": 5.113, "step": 596 }, { "epoch": 0.04902895208813733, "grad_norm": 1.8867571353912354, "learning_rate": 9.78688524590164e-05, "loss": 5.1684, "step": 598 }, { "epoch": 0.04919292851652575, "grad_norm": 1.278130292892456, "learning_rate": 9.819672131147541e-05, "loss": 5.0933, "step": 600 }, { "epoch": 0.04935690494491417, "grad_norm": 1.636001467704773, "learning_rate": 9.852459016393443e-05, "loss": 5.1324, "step": 602 }, { "epoch": 0.049520881373302586, "grad_norm": 1.7511135339736938, "learning_rate": 9.885245901639345e-05, "loss": 5.1855, "step": 604 }, { "epoch": 0.049684857801691004, "grad_norm": 1.5389798879623413, "learning_rate": 9.918032786885247e-05, "loss": 5.0774, "step": 606 }, { "epoch": 0.04984883423007943, "grad_norm": 1.466962218284607, "learning_rate": 9.950819672131148e-05, "loss": 5.0469, "step": 608 }, { "epoch": 0.05001281065846785, "grad_norm": 1.6687493324279785, "learning_rate": 9.98360655737705e-05, "loss": 5.1153, "step": 610 }, { "epoch": 0.050176787086856266, "grad_norm": 2.197819232940674, "learning_rate": 9.999999816220216e-05, "loss": 5.1082, "step": 612 }, { "epoch": 0.050340763515244684, "grad_norm": 1.3717740774154663, "learning_rate": 9.999998345982023e-05, "loss": 5.0569, "step": 614 }, { "epoch": 0.0505047399436331, "grad_norm": 1.4402227401733398, "learning_rate": 9.999995405506069e-05, "loss": 5.047, "step": 616 }, { "epoch": 0.05066871637202152, "grad_norm": 1.6121858358383179, "learning_rate": 9.999990994793218e-05, "loss": 5.1063, "step": 618 }, { "epoch": 0.05083269280040994, "grad_norm": 1.4336620569229126, "learning_rate": 9.999985113844767e-05, "loss": 5.095, "step": 620 }, { "epoch": 0.050996669228798364, "grad_norm": 1.3124680519104004, "learning_rate": 9.999977762662447e-05, "loss": 5.0633, "step": 622 }, { "epoch": 0.05116064565718678, "grad_norm": 1.4217371940612793, "learning_rate": 9.999968941248419e-05, "loss": 5.0547, "step": 624 }, { "epoch": 0.0513246220855752, "grad_norm": 1.182154655456543, "learning_rate": 9.999958649605275e-05, "loss": 5.1194, "step": 626 }, { "epoch": 0.05148859851396362, "grad_norm": 1.8198816776275635, "learning_rate": 9.999946887736043e-05, "loss": 5.0367, "step": 628 }, { "epoch": 0.051652574942352036, "grad_norm": 1.8673418760299683, "learning_rate": 9.99993365564418e-05, "loss": 5.0187, "step": 630 }, { "epoch": 0.051816551370740455, "grad_norm": 1.7428374290466309, "learning_rate": 9.99991895333358e-05, "loss": 5.0675, "step": 632 }, { "epoch": 0.05198052779912887, "grad_norm": 1.8670332431793213, "learning_rate": 9.999902780808563e-05, "loss": 5.0166, "step": 634 }, { "epoch": 0.05214450422751729, "grad_norm": 1.2812050580978394, "learning_rate": 9.999885138073886e-05, "loss": 5.059, "step": 636 }, { "epoch": 0.052308480655905716, "grad_norm": 1.7798051834106445, "learning_rate": 9.999866025134737e-05, "loss": 5.0736, "step": 638 }, { "epoch": 0.052472457084294134, "grad_norm": 1.9266560077667236, "learning_rate": 9.999845441996734e-05, "loss": 4.9463, "step": 640 }, { "epoch": 0.05263643351268255, "grad_norm": 1.7603977918624878, "learning_rate": 9.999823388665932e-05, "loss": 4.9446, "step": 642 }, { "epoch": 0.05280040994107097, "grad_norm": 1.383430004119873, "learning_rate": 9.999799865148816e-05, "loss": 5.0142, "step": 644 }, { "epoch": 0.05296438636945939, "grad_norm": 1.325101613998413, "learning_rate": 9.9997748714523e-05, "loss": 4.9583, "step": 646 }, { "epoch": 0.05312836279784781, "grad_norm": 1.3705639839172363, "learning_rate": 9.999748407583736e-05, "loss": 4.9762, "step": 648 }, { "epoch": 0.053292339226236225, "grad_norm": 1.5312895774841309, "learning_rate": 9.999720473550905e-05, "loss": 4.9743, "step": 650 }, { "epoch": 0.05345631565462465, "grad_norm": 1.5449235439300537, "learning_rate": 9.999691069362019e-05, "loss": 5.0046, "step": 652 }, { "epoch": 0.05362029208301307, "grad_norm": 1.4353389739990234, "learning_rate": 9.999660195025727e-05, "loss": 4.9794, "step": 654 }, { "epoch": 0.05378426851140149, "grad_norm": 1.7781524658203125, "learning_rate": 9.999627850551108e-05, "loss": 5.0089, "step": 656 }, { "epoch": 0.053948244939789905, "grad_norm": 1.5605298280715942, "learning_rate": 9.999594035947668e-05, "loss": 4.9213, "step": 658 }, { "epoch": 0.05411222136817832, "grad_norm": 1.1541820764541626, "learning_rate": 9.999558751225355e-05, "loss": 4.9311, "step": 660 }, { "epoch": 0.05427619779656674, "grad_norm": 1.2745718955993652, "learning_rate": 9.999521996394544e-05, "loss": 4.9893, "step": 662 }, { "epoch": 0.05444017422495516, "grad_norm": 1.3617721796035767, "learning_rate": 9.999483771466041e-05, "loss": 4.8475, "step": 664 }, { "epoch": 0.054604150653343585, "grad_norm": 1.6262531280517578, "learning_rate": 9.999444076451086e-05, "loss": 4.944, "step": 666 }, { "epoch": 0.054768127081732, "grad_norm": 1.830307960510254, "learning_rate": 9.99940291136135e-05, "loss": 4.9142, "step": 668 }, { "epoch": 0.05493210351012042, "grad_norm": 1.6637526750564575, "learning_rate": 9.999360276208942e-05, "loss": 4.9335, "step": 670 }, { "epoch": 0.05509607993850884, "grad_norm": 1.5176993608474731, "learning_rate": 9.999316171006395e-05, "loss": 4.8994, "step": 672 }, { "epoch": 0.05526005636689726, "grad_norm": 1.4975998401641846, "learning_rate": 9.999270595766677e-05, "loss": 4.9188, "step": 674 }, { "epoch": 0.055424032795285676, "grad_norm": 1.371583342552185, "learning_rate": 9.999223550503191e-05, "loss": 4.9602, "step": 676 }, { "epoch": 0.055588009223674094, "grad_norm": 1.2749911546707153, "learning_rate": 9.999175035229774e-05, "loss": 4.8472, "step": 678 }, { "epoch": 0.05575198565206252, "grad_norm": 1.6738046407699585, "learning_rate": 9.999125049960687e-05, "loss": 4.8499, "step": 680 }, { "epoch": 0.05591596208045094, "grad_norm": 1.3173338174819946, "learning_rate": 9.999073594710629e-05, "loss": 4.8861, "step": 682 }, { "epoch": 0.056079938508839355, "grad_norm": 1.332027792930603, "learning_rate": 9.999020669494731e-05, "loss": 4.9713, "step": 684 }, { "epoch": 0.056243914937227774, "grad_norm": 1.3671154975891113, "learning_rate": 9.998966274328557e-05, "loss": 4.888, "step": 686 }, { "epoch": 0.05640789136561619, "grad_norm": 1.5266999006271362, "learning_rate": 9.998910409228097e-05, "loss": 4.891, "step": 688 }, { "epoch": 0.05657186779400461, "grad_norm": 1.5487200021743774, "learning_rate": 9.998853074209785e-05, "loss": 4.8246, "step": 690 }, { "epoch": 0.05673584422239303, "grad_norm": 1.2039135694503784, "learning_rate": 9.998794269290474e-05, "loss": 4.837, "step": 692 }, { "epoch": 0.05689982065078145, "grad_norm": 1.6249526739120483, "learning_rate": 9.998733994487458e-05, "loss": 4.8498, "step": 694 }, { "epoch": 0.05706379707916987, "grad_norm": 1.849284291267395, "learning_rate": 9.998672249818461e-05, "loss": 4.8735, "step": 696 }, { "epoch": 0.05722777350755829, "grad_norm": 1.22111976146698, "learning_rate": 9.998609035301638e-05, "loss": 4.8487, "step": 698 }, { "epoch": 0.05739174993594671, "grad_norm": 1.4855297803878784, "learning_rate": 9.998544350955578e-05, "loss": 4.8222, "step": 700 }, { "epoch": 0.057555726364335126, "grad_norm": 1.0973803997039795, "learning_rate": 9.998478196799301e-05, "loss": 4.8494, "step": 702 }, { "epoch": 0.057719702792723544, "grad_norm": 1.3077099323272705, "learning_rate": 9.998410572852259e-05, "loss": 4.9111, "step": 704 }, { "epoch": 0.05788367922111196, "grad_norm": 1.3853782415390015, "learning_rate": 9.998341479134337e-05, "loss": 4.8096, "step": 706 }, { "epoch": 0.05804765564950039, "grad_norm": 1.0545806884765625, "learning_rate": 9.998270915665852e-05, "loss": 4.8357, "step": 708 }, { "epoch": 0.058211632077888806, "grad_norm": 1.1127121448516846, "learning_rate": 9.998198882467552e-05, "loss": 4.7969, "step": 710 }, { "epoch": 0.058375608506277224, "grad_norm": 1.3986823558807373, "learning_rate": 9.998125379560618e-05, "loss": 4.851, "step": 712 }, { "epoch": 0.05853958493466564, "grad_norm": 1.2785799503326416, "learning_rate": 9.998050406966668e-05, "loss": 4.7953, "step": 714 }, { "epoch": 0.05870356136305406, "grad_norm": 1.2151364088058472, "learning_rate": 9.99797396470774e-05, "loss": 4.7614, "step": 716 }, { "epoch": 0.05886753779144248, "grad_norm": 1.221731424331665, "learning_rate": 9.997896052806319e-05, "loss": 4.7832, "step": 718 }, { "epoch": 0.0590315142198309, "grad_norm": 1.2028709650039673, "learning_rate": 9.99781667128531e-05, "loss": 4.7496, "step": 720 }, { "epoch": 0.05919549064821932, "grad_norm": 1.2175623178482056, "learning_rate": 9.997735820168055e-05, "loss": 4.764, "step": 722 }, { "epoch": 0.05935946707660774, "grad_norm": 1.2626240253448486, "learning_rate": 9.99765349947833e-05, "loss": 4.7384, "step": 724 }, { "epoch": 0.05952344350499616, "grad_norm": 1.1986509561538696, "learning_rate": 9.997569709240339e-05, "loss": 4.771, "step": 726 }, { "epoch": 0.059687419933384576, "grad_norm": 1.3522735834121704, "learning_rate": 9.997484449478724e-05, "loss": 4.8, "step": 728 }, { "epoch": 0.059851396361772995, "grad_norm": 1.4882395267486572, "learning_rate": 9.997397720218553e-05, "loss": 4.7719, "step": 730 }, { "epoch": 0.06001537279016141, "grad_norm": 1.54912531375885, "learning_rate": 9.99730952148533e-05, "loss": 4.796, "step": 732 }, { "epoch": 0.06017934921854983, "grad_norm": 1.442063570022583, "learning_rate": 9.997219853304986e-05, "loss": 4.7315, "step": 734 }, { "epoch": 0.06034332564693825, "grad_norm": 1.193917155265808, "learning_rate": 9.997128715703892e-05, "loss": 4.7235, "step": 736 }, { "epoch": 0.060507302075326674, "grad_norm": 1.3164023160934448, "learning_rate": 9.997036108708843e-05, "loss": 4.7216, "step": 738 }, { "epoch": 0.06067127850371509, "grad_norm": 1.4200711250305176, "learning_rate": 9.996942032347074e-05, "loss": 4.7607, "step": 740 }, { "epoch": 0.06083525493210351, "grad_norm": 1.6272212266921997, "learning_rate": 9.996846486646245e-05, "loss": 4.7334, "step": 742 }, { "epoch": 0.06099923136049193, "grad_norm": 1.371127963066101, "learning_rate": 9.996749471634452e-05, "loss": 4.7856, "step": 744 }, { "epoch": 0.06116320778888035, "grad_norm": 1.3041075468063354, "learning_rate": 9.996650987340222e-05, "loss": 4.7614, "step": 746 }, { "epoch": 0.061327184217268765, "grad_norm": 1.3496429920196533, "learning_rate": 9.996551033792514e-05, "loss": 4.7778, "step": 748 }, { "epoch": 0.061491160645657184, "grad_norm": 1.433722734451294, "learning_rate": 9.996449611020719e-05, "loss": 4.7239, "step": 750 }, { "epoch": 0.06165513707404561, "grad_norm": 1.6855007410049438, "learning_rate": 9.996346719054659e-05, "loss": 4.8127, "step": 752 }, { "epoch": 0.06181911350243403, "grad_norm": 1.6095404624938965, "learning_rate": 9.996242357924591e-05, "loss": 4.718, "step": 754 }, { "epoch": 0.061983089930822445, "grad_norm": 1.3969067335128784, "learning_rate": 9.996136527661202e-05, "loss": 4.6984, "step": 756 }, { "epoch": 0.06214706635921086, "grad_norm": 1.154539704322815, "learning_rate": 9.99602922829561e-05, "loss": 4.7393, "step": 758 }, { "epoch": 0.06231104278759928, "grad_norm": 1.1856812238693237, "learning_rate": 9.995920459859367e-05, "loss": 4.6979, "step": 760 }, { "epoch": 0.0624750192159877, "grad_norm": 1.17214834690094, "learning_rate": 9.995810222384454e-05, "loss": 4.6376, "step": 762 }, { "epoch": 0.06263899564437612, "grad_norm": 1.2906792163848877, "learning_rate": 9.995698515903289e-05, "loss": 4.7684, "step": 764 }, { "epoch": 0.06280297207276454, "grad_norm": 1.3379504680633545, "learning_rate": 9.995585340448719e-05, "loss": 4.6261, "step": 766 }, { "epoch": 0.06296694850115296, "grad_norm": 1.4633430242538452, "learning_rate": 9.995470696054021e-05, "loss": 4.6889, "step": 768 }, { "epoch": 0.06313092492954138, "grad_norm": 1.4456816911697388, "learning_rate": 9.995354582752907e-05, "loss": 4.644, "step": 770 }, { "epoch": 0.0632949013579298, "grad_norm": 1.2467187643051147, "learning_rate": 9.995237000579519e-05, "loss": 4.613, "step": 772 }, { "epoch": 0.06345887778631822, "grad_norm": 1.2840498685836792, "learning_rate": 9.995117949568433e-05, "loss": 4.6492, "step": 774 }, { "epoch": 0.06362285421470663, "grad_norm": 0.9967436194419861, "learning_rate": 9.994997429754656e-05, "loss": 4.6299, "step": 776 }, { "epoch": 0.06378683064309505, "grad_norm": 1.2300001382827759, "learning_rate": 9.994875441173623e-05, "loss": 4.6396, "step": 778 }, { "epoch": 0.06395080707148347, "grad_norm": 1.225391149520874, "learning_rate": 9.99475198386121e-05, "loss": 4.6316, "step": 780 }, { "epoch": 0.06411478349987189, "grad_norm": 1.148902177810669, "learning_rate": 9.994627057853714e-05, "loss": 4.6019, "step": 782 }, { "epoch": 0.0642787599282603, "grad_norm": 1.3424532413482666, "learning_rate": 9.994500663187874e-05, "loss": 4.6052, "step": 784 }, { "epoch": 0.06444273635664872, "grad_norm": 1.1050846576690674, "learning_rate": 9.99437279990085e-05, "loss": 4.5606, "step": 786 }, { "epoch": 0.06460671278503716, "grad_norm": 1.2558073997497559, "learning_rate": 9.994243468030247e-05, "loss": 4.5245, "step": 788 }, { "epoch": 0.06477068921342558, "grad_norm": 1.366250991821289, "learning_rate": 9.99411266761409e-05, "loss": 4.6662, "step": 790 }, { "epoch": 0.064934665641814, "grad_norm": 1.0933619737625122, "learning_rate": 9.993980398690843e-05, "loss": 4.5972, "step": 792 }, { "epoch": 0.06509864207020241, "grad_norm": 1.0256333351135254, "learning_rate": 9.993846661299396e-05, "loss": 4.5935, "step": 794 }, { "epoch": 0.06526261849859083, "grad_norm": 0.9090489149093628, "learning_rate": 9.993711455479077e-05, "loss": 4.5371, "step": 796 }, { "epoch": 0.06542659492697925, "grad_norm": 1.3676148653030396, "learning_rate": 9.993574781269644e-05, "loss": 4.5959, "step": 798 }, { "epoch": 0.06559057135536767, "grad_norm": 1.1888647079467773, "learning_rate": 9.993436638711284e-05, "loss": 4.6145, "step": 800 }, { "epoch": 0.06575454778375608, "grad_norm": 0.883764386177063, "learning_rate": 9.993297027844616e-05, "loss": 4.5562, "step": 802 }, { "epoch": 0.0659185242121445, "grad_norm": 0.969134509563446, "learning_rate": 9.993155948710694e-05, "loss": 4.6248, "step": 804 }, { "epoch": 0.06608250064053292, "grad_norm": 0.9472710490226746, "learning_rate": 9.993013401351002e-05, "loss": 4.5769, "step": 806 }, { "epoch": 0.06624647706892134, "grad_norm": 1.162370204925537, "learning_rate": 9.992869385807455e-05, "loss": 4.5947, "step": 808 }, { "epoch": 0.06641045349730976, "grad_norm": 1.0858770608901978, "learning_rate": 9.992723902122403e-05, "loss": 4.5491, "step": 810 }, { "epoch": 0.06657442992569818, "grad_norm": 1.0923309326171875, "learning_rate": 9.992576950338621e-05, "loss": 4.5982, "step": 812 }, { "epoch": 0.0667384063540866, "grad_norm": 1.1544495820999146, "learning_rate": 9.992428530499323e-05, "loss": 4.5675, "step": 814 }, { "epoch": 0.06690238278247503, "grad_norm": 1.1099858283996582, "learning_rate": 9.99227864264815e-05, "loss": 4.5893, "step": 816 }, { "epoch": 0.06706635921086344, "grad_norm": 1.3164221048355103, "learning_rate": 9.992127286829176e-05, "loss": 4.5993, "step": 818 }, { "epoch": 0.06723033563925186, "grad_norm": 1.2815008163452148, "learning_rate": 9.991974463086908e-05, "loss": 4.5687, "step": 820 }, { "epoch": 0.06739431206764028, "grad_norm": 1.104801058769226, "learning_rate": 9.991820171466284e-05, "loss": 4.5231, "step": 822 }, { "epoch": 0.0675582884960287, "grad_norm": 1.2623943090438843, "learning_rate": 9.99166441201267e-05, "loss": 4.5498, "step": 824 }, { "epoch": 0.06772226492441712, "grad_norm": 1.3679825067520142, "learning_rate": 9.991507184771869e-05, "loss": 4.5317, "step": 826 }, { "epoch": 0.06788624135280553, "grad_norm": 1.1458314657211304, "learning_rate": 9.991348489790113e-05, "loss": 4.4599, "step": 828 }, { "epoch": 0.06805021778119395, "grad_norm": 1.1556310653686523, "learning_rate": 9.991188327114068e-05, "loss": 4.5466, "step": 830 }, { "epoch": 0.06821419420958237, "grad_norm": 1.2080873250961304, "learning_rate": 9.991026696790825e-05, "loss": 4.5734, "step": 832 }, { "epoch": 0.06837817063797079, "grad_norm": 1.3832921981811523, "learning_rate": 9.990863598867914e-05, "loss": 4.5367, "step": 834 }, { "epoch": 0.06854214706635921, "grad_norm": 0.9393659234046936, "learning_rate": 9.990699033393293e-05, "loss": 4.5072, "step": 836 }, { "epoch": 0.06870612349474763, "grad_norm": 0.9582691788673401, "learning_rate": 9.990533000415352e-05, "loss": 4.5046, "step": 838 }, { "epoch": 0.06887009992313604, "grad_norm": 1.2170627117156982, "learning_rate": 9.990365499982912e-05, "loss": 4.4628, "step": 840 }, { "epoch": 0.06903407635152446, "grad_norm": 1.037985920906067, "learning_rate": 9.990196532145227e-05, "loss": 4.5521, "step": 842 }, { "epoch": 0.0691980527799129, "grad_norm": 0.9628452658653259, "learning_rate": 9.990026096951981e-05, "loss": 4.5028, "step": 844 }, { "epoch": 0.06936202920830131, "grad_norm": 1.0910757780075073, "learning_rate": 9.98985419445329e-05, "loss": 4.5354, "step": 846 }, { "epoch": 0.06952600563668973, "grad_norm": 1.5108650922775269, "learning_rate": 9.989680824699703e-05, "loss": 4.5267, "step": 848 }, { "epoch": 0.06968998206507815, "grad_norm": 1.214145541191101, "learning_rate": 9.989505987742198e-05, "loss": 4.5271, "step": 850 }, { "epoch": 0.06985395849346657, "grad_norm": 1.2133456468582153, "learning_rate": 9.989329683632185e-05, "loss": 4.5195, "step": 852 }, { "epoch": 0.07001793492185499, "grad_norm": 1.4688955545425415, "learning_rate": 9.989151912421503e-05, "loss": 4.449, "step": 854 }, { "epoch": 0.0701819113502434, "grad_norm": 0.9931148290634155, "learning_rate": 9.988972674162432e-05, "loss": 4.4952, "step": 856 }, { "epoch": 0.07034588777863182, "grad_norm": 1.1149705648422241, "learning_rate": 9.988791968907671e-05, "loss": 4.4773, "step": 858 }, { "epoch": 0.07050986420702024, "grad_norm": 1.125609278678894, "learning_rate": 9.98860979671036e-05, "loss": 4.4371, "step": 860 }, { "epoch": 0.07067384063540866, "grad_norm": 0.985559344291687, "learning_rate": 9.988426157624063e-05, "loss": 4.4348, "step": 862 }, { "epoch": 0.07083781706379708, "grad_norm": 0.937849223613739, "learning_rate": 9.988241051702778e-05, "loss": 4.4481, "step": 864 }, { "epoch": 0.0710017934921855, "grad_norm": 1.4474624395370483, "learning_rate": 9.98805447900094e-05, "loss": 4.5007, "step": 866 }, { "epoch": 0.07116576992057391, "grad_norm": 0.9758108854293823, "learning_rate": 9.987866439573403e-05, "loss": 4.448, "step": 868 }, { "epoch": 0.07132974634896233, "grad_norm": 1.147123098373413, "learning_rate": 9.987676933475467e-05, "loss": 4.3825, "step": 870 }, { "epoch": 0.07149372277735076, "grad_norm": 1.2334039211273193, "learning_rate": 9.98748596076285e-05, "loss": 4.4577, "step": 872 }, { "epoch": 0.07165769920573918, "grad_norm": 1.1643340587615967, "learning_rate": 9.987293521491711e-05, "loss": 4.4511, "step": 874 }, { "epoch": 0.0718216756341276, "grad_norm": 0.9636064767837524, "learning_rate": 9.987099615718634e-05, "loss": 4.4068, "step": 876 }, { "epoch": 0.07198565206251602, "grad_norm": 1.0015913248062134, "learning_rate": 9.986904243500637e-05, "loss": 4.4082, "step": 878 }, { "epoch": 0.07214962849090444, "grad_norm": 1.003480076789856, "learning_rate": 9.98670740489517e-05, "loss": 4.368, "step": 880 }, { "epoch": 0.07231360491929285, "grad_norm": 1.0843322277069092, "learning_rate": 9.98650909996011e-05, "loss": 4.4389, "step": 882 }, { "epoch": 0.07247758134768127, "grad_norm": 1.0302671194076538, "learning_rate": 9.986309328753772e-05, "loss": 4.5192, "step": 884 }, { "epoch": 0.07264155777606969, "grad_norm": 1.0695348978042603, "learning_rate": 9.986108091334896e-05, "loss": 4.4382, "step": 886 }, { "epoch": 0.07280553420445811, "grad_norm": 0.8926975131034851, "learning_rate": 9.985905387762656e-05, "loss": 4.3326, "step": 888 }, { "epoch": 0.07296951063284653, "grad_norm": 0.8886080384254456, "learning_rate": 9.985701218096655e-05, "loss": 4.3415, "step": 890 }, { "epoch": 0.07313348706123494, "grad_norm": 1.0607457160949707, "learning_rate": 9.985495582396931e-05, "loss": 4.3892, "step": 892 }, { "epoch": 0.07329746348962336, "grad_norm": 0.9411015510559082, "learning_rate": 9.985288480723949e-05, "loss": 4.4194, "step": 894 }, { "epoch": 0.07346143991801178, "grad_norm": 1.1800034046173096, "learning_rate": 9.985079913138607e-05, "loss": 4.2992, "step": 896 }, { "epoch": 0.0736254163464002, "grad_norm": 0.9723591804504395, "learning_rate": 9.984869879702235e-05, "loss": 4.3078, "step": 898 }, { "epoch": 0.07378939277478862, "grad_norm": 1.0988435745239258, "learning_rate": 9.98465838047659e-05, "loss": 4.3855, "step": 900 }, { "epoch": 0.07395336920317705, "grad_norm": 1.3330668210983276, "learning_rate": 9.984445415523866e-05, "loss": 4.326, "step": 902 }, { "epoch": 0.07411734563156547, "grad_norm": 1.271883249282837, "learning_rate": 9.984230984906684e-05, "loss": 4.414, "step": 904 }, { "epoch": 0.07428132205995389, "grad_norm": 1.2705031633377075, "learning_rate": 9.984015088688094e-05, "loss": 4.3481, "step": 906 }, { "epoch": 0.0744452984883423, "grad_norm": 1.1692169904708862, "learning_rate": 9.983797726931585e-05, "loss": 4.3656, "step": 908 }, { "epoch": 0.07460927491673072, "grad_norm": 1.557630181312561, "learning_rate": 9.983578899701068e-05, "loss": 4.3926, "step": 910 }, { "epoch": 0.07477325134511914, "grad_norm": 1.252167820930481, "learning_rate": 9.98335860706089e-05, "loss": 4.3261, "step": 912 }, { "epoch": 0.07493722777350756, "grad_norm": 1.0178437232971191, "learning_rate": 9.983136849075827e-05, "loss": 4.3705, "step": 914 }, { "epoch": 0.07510120420189598, "grad_norm": 1.0884320735931396, "learning_rate": 9.982913625811086e-05, "loss": 4.3286, "step": 916 }, { "epoch": 0.0752651806302844, "grad_norm": 1.018403172492981, "learning_rate": 9.982688937332305e-05, "loss": 4.3491, "step": 918 }, { "epoch": 0.07542915705867281, "grad_norm": 1.3070770502090454, "learning_rate": 9.982462783705555e-05, "loss": 4.3412, "step": 920 }, { "epoch": 0.07559313348706123, "grad_norm": 0.8433274030685425, "learning_rate": 9.982235164997336e-05, "loss": 4.3234, "step": 922 }, { "epoch": 0.07575710991544965, "grad_norm": 1.3230133056640625, "learning_rate": 9.982006081274575e-05, "loss": 4.275, "step": 924 }, { "epoch": 0.07592108634383807, "grad_norm": 1.1820060014724731, "learning_rate": 9.981775532604637e-05, "loss": 4.2512, "step": 926 }, { "epoch": 0.07608506277222649, "grad_norm": 1.17715322971344, "learning_rate": 9.981543519055314e-05, "loss": 4.2675, "step": 928 }, { "epoch": 0.07624903920061492, "grad_norm": 0.9106295704841614, "learning_rate": 9.981310040694829e-05, "loss": 4.2771, "step": 930 }, { "epoch": 0.07641301562900334, "grad_norm": 1.2079112529754639, "learning_rate": 9.981075097591834e-05, "loss": 4.3381, "step": 932 }, { "epoch": 0.07657699205739175, "grad_norm": 1.199350118637085, "learning_rate": 9.980838689815414e-05, "loss": 4.3208, "step": 934 }, { "epoch": 0.07674096848578017, "grad_norm": 1.088350534439087, "learning_rate": 9.980600817435086e-05, "loss": 4.2439, "step": 936 }, { "epoch": 0.07690494491416859, "grad_norm": 1.0347201824188232, "learning_rate": 9.980361480520794e-05, "loss": 4.3169, "step": 938 }, { "epoch": 0.07706892134255701, "grad_norm": 1.3007529973983765, "learning_rate": 9.980120679142917e-05, "loss": 4.2441, "step": 940 }, { "epoch": 0.07723289777094543, "grad_norm": 0.9512838125228882, "learning_rate": 9.979878413372259e-05, "loss": 4.2474, "step": 942 }, { "epoch": 0.07739687419933385, "grad_norm": 1.170279622077942, "learning_rate": 9.979634683280059e-05, "loss": 4.2459, "step": 944 }, { "epoch": 0.07756085062772226, "grad_norm": 0.9993996620178223, "learning_rate": 9.979389488937984e-05, "loss": 4.2862, "step": 946 }, { "epoch": 0.07772482705611068, "grad_norm": 0.8914362788200378, "learning_rate": 9.979142830418134e-05, "loss": 4.2872, "step": 948 }, { "epoch": 0.0778888034844991, "grad_norm": 1.153712511062622, "learning_rate": 9.978894707793039e-05, "loss": 4.2023, "step": 950 }, { "epoch": 0.07805277991288752, "grad_norm": 1.2968518733978271, "learning_rate": 9.978645121135659e-05, "loss": 4.1831, "step": 952 }, { "epoch": 0.07821675634127594, "grad_norm": 1.2519747018814087, "learning_rate": 9.978394070519383e-05, "loss": 4.2492, "step": 954 }, { "epoch": 0.07838073276966435, "grad_norm": 1.1906182765960693, "learning_rate": 9.978141556018031e-05, "loss": 4.2596, "step": 956 }, { "epoch": 0.07854470919805279, "grad_norm": 1.073822021484375, "learning_rate": 9.977887577705857e-05, "loss": 4.2341, "step": 958 }, { "epoch": 0.0787086856264412, "grad_norm": 1.1360152959823608, "learning_rate": 9.977632135657543e-05, "loss": 4.2389, "step": 960 }, { "epoch": 0.07887266205482962, "grad_norm": 1.0772216320037842, "learning_rate": 9.977375229948195e-05, "loss": 4.1266, "step": 962 }, { "epoch": 0.07903663848321804, "grad_norm": 1.0052435398101807, "learning_rate": 9.977116860653363e-05, "loss": 4.2679, "step": 964 }, { "epoch": 0.07920061491160646, "grad_norm": 0.8987570405006409, "learning_rate": 9.976857027849019e-05, "loss": 4.2019, "step": 966 }, { "epoch": 0.07936459133999488, "grad_norm": 1.3094909191131592, "learning_rate": 9.97659573161156e-05, "loss": 4.152, "step": 968 }, { "epoch": 0.0795285677683833, "grad_norm": 1.0730571746826172, "learning_rate": 9.976332972017826e-05, "loss": 4.1829, "step": 970 }, { "epoch": 0.07969254419677171, "grad_norm": 1.0100387334823608, "learning_rate": 9.976068749145078e-05, "loss": 4.1619, "step": 972 }, { "epoch": 0.07985652062516013, "grad_norm": 0.9166683554649353, "learning_rate": 9.97580306307101e-05, "loss": 4.2055, "step": 974 }, { "epoch": 0.08002049705354855, "grad_norm": 0.9950259327888489, "learning_rate": 9.975535913873748e-05, "loss": 4.2696, "step": 976 }, { "epoch": 0.08018447348193697, "grad_norm": 1.174676775932312, "learning_rate": 9.975267301631846e-05, "loss": 4.257, "step": 978 }, { "epoch": 0.08034844991032539, "grad_norm": 0.9548665881156921, "learning_rate": 9.974997226424288e-05, "loss": 4.1695, "step": 980 }, { "epoch": 0.0805124263387138, "grad_norm": 1.1128441095352173, "learning_rate": 9.974725688330489e-05, "loss": 4.158, "step": 982 }, { "epoch": 0.08067640276710222, "grad_norm": 1.0748997926712036, "learning_rate": 9.974452687430293e-05, "loss": 4.1652, "step": 984 }, { "epoch": 0.08084037919549064, "grad_norm": 1.1463944911956787, "learning_rate": 9.974178223803981e-05, "loss": 4.1962, "step": 986 }, { "epoch": 0.08100435562387907, "grad_norm": 1.18082857131958, "learning_rate": 9.97390229753225e-05, "loss": 4.2016, "step": 988 }, { "epoch": 0.08116833205226749, "grad_norm": 1.0245002508163452, "learning_rate": 9.973624908696242e-05, "loss": 4.1244, "step": 990 }, { "epoch": 0.08133230848065591, "grad_norm": 1.0269415378570557, "learning_rate": 9.973346057377519e-05, "loss": 4.1954, "step": 992 }, { "epoch": 0.08149628490904433, "grad_norm": 1.3380223512649536, "learning_rate": 9.973065743658078e-05, "loss": 4.1392, "step": 994 }, { "epoch": 0.08166026133743275, "grad_norm": 1.0681672096252441, "learning_rate": 9.972783967620345e-05, "loss": 4.0451, "step": 996 }, { "epoch": 0.08182423776582116, "grad_norm": 1.1791712045669556, "learning_rate": 9.972500729347176e-05, "loss": 4.1513, "step": 998 }, { "epoch": 0.08198821419420958, "grad_norm": 0.9805436134338379, "learning_rate": 9.972216028921854e-05, "loss": 4.0942, "step": 1000 }, { "epoch": 0.082152190622598, "grad_norm": 1.2421460151672363, "learning_rate": 9.971929866428095e-05, "loss": 4.1216, "step": 1002 }, { "epoch": 0.08231616705098642, "grad_norm": 1.085977554321289, "learning_rate": 9.971642241950048e-05, "loss": 4.0897, "step": 1004 }, { "epoch": 0.08248014347937484, "grad_norm": 0.987576425075531, "learning_rate": 9.971353155572284e-05, "loss": 4.1011, "step": 1006 }, { "epoch": 0.08264411990776326, "grad_norm": 1.1075130701065063, "learning_rate": 9.97106260737981e-05, "loss": 4.1546, "step": 1008 }, { "epoch": 0.08280809633615167, "grad_norm": 1.0441356897354126, "learning_rate": 9.97077059745806e-05, "loss": 4.1124, "step": 1010 }, { "epoch": 0.08297207276454009, "grad_norm": 1.0685722827911377, "learning_rate": 9.970477125892902e-05, "loss": 4.1241, "step": 1012 }, { "epoch": 0.08313604919292851, "grad_norm": 0.9862858057022095, "learning_rate": 9.970182192770627e-05, "loss": 4.1194, "step": 1014 }, { "epoch": 0.08330002562131694, "grad_norm": 1.0223233699798584, "learning_rate": 9.96988579817796e-05, "loss": 4.1471, "step": 1016 }, { "epoch": 0.08346400204970536, "grad_norm": 1.0092227458953857, "learning_rate": 9.969587942202057e-05, "loss": 4.141, "step": 1018 }, { "epoch": 0.08362797847809378, "grad_norm": 0.9383386969566345, "learning_rate": 9.9692886249305e-05, "loss": 4.0195, "step": 1020 }, { "epoch": 0.0837919549064822, "grad_norm": 0.7819382548332214, "learning_rate": 9.968987846451305e-05, "loss": 4.0688, "step": 1022 }, { "epoch": 0.08395593133487061, "grad_norm": 0.9499008655548096, "learning_rate": 9.968685606852913e-05, "loss": 4.1559, "step": 1024 }, { "epoch": 0.08411990776325903, "grad_norm": 0.9793714880943298, "learning_rate": 9.968381906224195e-05, "loss": 4.1147, "step": 1026 }, { "epoch": 0.08428388419164745, "grad_norm": 0.8692449927330017, "learning_rate": 9.968076744654458e-05, "loss": 4.0808, "step": 1028 }, { "epoch": 0.08444786062003587, "grad_norm": 1.0884157419204712, "learning_rate": 9.967770122233431e-05, "loss": 4.0656, "step": 1030 }, { "epoch": 0.08461183704842429, "grad_norm": 0.8661439418792725, "learning_rate": 9.967462039051275e-05, "loss": 4.0854, "step": 1032 }, { "epoch": 0.0847758134768127, "grad_norm": 0.8530150651931763, "learning_rate": 9.967152495198584e-05, "loss": 4.0791, "step": 1034 }, { "epoch": 0.08493978990520112, "grad_norm": 1.156949520111084, "learning_rate": 9.966841490766378e-05, "loss": 4.0719, "step": 1036 }, { "epoch": 0.08510376633358954, "grad_norm": 0.9264504313468933, "learning_rate": 9.966529025846105e-05, "loss": 4.0668, "step": 1038 }, { "epoch": 0.08526774276197796, "grad_norm": 1.0428452491760254, "learning_rate": 9.966215100529645e-05, "loss": 4.0053, "step": 1040 }, { "epoch": 0.08543171919036638, "grad_norm": 0.9271348118782043, "learning_rate": 9.96589971490931e-05, "loss": 4.105, "step": 1042 }, { "epoch": 0.08559569561875481, "grad_norm": 1.1432150602340698, "learning_rate": 9.965582869077836e-05, "loss": 4.0669, "step": 1044 }, { "epoch": 0.08575967204714323, "grad_norm": 0.9777700901031494, "learning_rate": 9.965264563128391e-05, "loss": 4.01, "step": 1046 }, { "epoch": 0.08592364847553165, "grad_norm": 0.8779552578926086, "learning_rate": 9.96494479715457e-05, "loss": 3.9549, "step": 1048 }, { "epoch": 0.08608762490392007, "grad_norm": 0.915309488773346, "learning_rate": 9.964623571250404e-05, "loss": 4.0066, "step": 1050 }, { "epoch": 0.08625160133230848, "grad_norm": 0.753326416015625, "learning_rate": 9.964300885510345e-05, "loss": 4.0328, "step": 1052 }, { "epoch": 0.0864155777606969, "grad_norm": 0.8076086044311523, "learning_rate": 9.96397674002928e-05, "loss": 4.0298, "step": 1054 }, { "epoch": 0.08657955418908532, "grad_norm": 1.0535773038864136, "learning_rate": 9.963651134902524e-05, "loss": 4.0164, "step": 1056 }, { "epoch": 0.08674353061747374, "grad_norm": 0.8676068186759949, "learning_rate": 9.963324070225817e-05, "loss": 4.0412, "step": 1058 }, { "epoch": 0.08690750704586216, "grad_norm": 0.9461095333099365, "learning_rate": 9.962995546095333e-05, "loss": 4.0265, "step": 1060 }, { "epoch": 0.08707148347425057, "grad_norm": 0.9060032367706299, "learning_rate": 9.962665562607676e-05, "loss": 4.0104, "step": 1062 }, { "epoch": 0.08723545990263899, "grad_norm": 0.8914903998374939, "learning_rate": 9.962334119859873e-05, "loss": 3.9873, "step": 1064 }, { "epoch": 0.08739943633102741, "grad_norm": 1.057827353477478, "learning_rate": 9.962001217949389e-05, "loss": 4.0135, "step": 1066 }, { "epoch": 0.08756341275941583, "grad_norm": 0.9309613108634949, "learning_rate": 9.961666856974108e-05, "loss": 4.0184, "step": 1068 }, { "epoch": 0.08772738918780425, "grad_norm": 1.0464098453521729, "learning_rate": 9.961331037032351e-05, "loss": 4.0043, "step": 1070 }, { "epoch": 0.08789136561619267, "grad_norm": 0.7158762812614441, "learning_rate": 9.960993758222863e-05, "loss": 4.0443, "step": 1072 }, { "epoch": 0.0880553420445811, "grad_norm": 0.7665286660194397, "learning_rate": 9.960655020644823e-05, "loss": 3.9872, "step": 1074 }, { "epoch": 0.08821931847296952, "grad_norm": 0.8457959890365601, "learning_rate": 9.960314824397833e-05, "loss": 3.9417, "step": 1076 }, { "epoch": 0.08838329490135793, "grad_norm": 1.152944564819336, "learning_rate": 9.959973169581928e-05, "loss": 4.0407, "step": 1078 }, { "epoch": 0.08854727132974635, "grad_norm": 0.9561640620231628, "learning_rate": 9.959630056297573e-05, "loss": 4.0229, "step": 1080 }, { "epoch": 0.08871124775813477, "grad_norm": 0.9881964921951294, "learning_rate": 9.959285484645658e-05, "loss": 3.9769, "step": 1082 }, { "epoch": 0.08887522418652319, "grad_norm": 0.891594409942627, "learning_rate": 9.9589394547275e-05, "loss": 3.9446, "step": 1084 }, { "epoch": 0.0890392006149116, "grad_norm": 0.7694927453994751, "learning_rate": 9.958591966644853e-05, "loss": 3.968, "step": 1086 }, { "epoch": 0.08920317704330002, "grad_norm": 0.9506424069404602, "learning_rate": 9.958243020499893e-05, "loss": 3.976, "step": 1088 }, { "epoch": 0.08936715347168844, "grad_norm": 0.9964757561683655, "learning_rate": 9.95789261639523e-05, "loss": 4.0114, "step": 1090 }, { "epoch": 0.08953112990007686, "grad_norm": 1.0715919733047485, "learning_rate": 9.957540754433894e-05, "loss": 3.9759, "step": 1092 }, { "epoch": 0.08969510632846528, "grad_norm": 0.9044798612594604, "learning_rate": 9.957187434719352e-05, "loss": 3.9858, "step": 1094 }, { "epoch": 0.0898590827568537, "grad_norm": 0.8711757659912109, "learning_rate": 9.956832657355497e-05, "loss": 3.9678, "step": 1096 }, { "epoch": 0.09002305918524212, "grad_norm": 1.049402117729187, "learning_rate": 9.956476422446652e-05, "loss": 3.8973, "step": 1098 }, { "epoch": 0.09018703561363053, "grad_norm": 0.9575179219245911, "learning_rate": 9.956118730097564e-05, "loss": 3.964, "step": 1100 }, { "epoch": 0.09035101204201897, "grad_norm": 1.0238109827041626, "learning_rate": 9.955759580413412e-05, "loss": 3.9312, "step": 1102 }, { "epoch": 0.09051498847040738, "grad_norm": 0.9079989790916443, "learning_rate": 9.955398973499805e-05, "loss": 3.9918, "step": 1104 }, { "epoch": 0.0906789648987958, "grad_norm": 0.9520390033721924, "learning_rate": 9.955036909462777e-05, "loss": 3.9605, "step": 1106 }, { "epoch": 0.09084294132718422, "grad_norm": 0.9960986971855164, "learning_rate": 9.954673388408793e-05, "loss": 3.9898, "step": 1108 }, { "epoch": 0.09100691775557264, "grad_norm": 0.9239450097084045, "learning_rate": 9.954308410444747e-05, "loss": 3.9124, "step": 1110 }, { "epoch": 0.09117089418396106, "grad_norm": 0.8150608539581299, "learning_rate": 9.953941975677954e-05, "loss": 4.0019, "step": 1112 }, { "epoch": 0.09133487061234948, "grad_norm": 0.8617908358573914, "learning_rate": 9.953574084216171e-05, "loss": 3.9295, "step": 1114 }, { "epoch": 0.0914988470407379, "grad_norm": 0.9470566511154175, "learning_rate": 9.953204736167569e-05, "loss": 3.9361, "step": 1116 }, { "epoch": 0.09166282346912631, "grad_norm": 0.8053050637245178, "learning_rate": 9.95283393164076e-05, "loss": 3.952, "step": 1118 }, { "epoch": 0.09182679989751473, "grad_norm": 0.8299336433410645, "learning_rate": 9.952461670744774e-05, "loss": 3.9024, "step": 1120 }, { "epoch": 0.09199077632590315, "grad_norm": 0.8287034630775452, "learning_rate": 9.952087953589073e-05, "loss": 3.8938, "step": 1122 }, { "epoch": 0.09215475275429157, "grad_norm": 0.8874202370643616, "learning_rate": 9.951712780283552e-05, "loss": 3.9419, "step": 1124 }, { "epoch": 0.09231872918267998, "grad_norm": 0.855707585811615, "learning_rate": 9.951336150938526e-05, "loss": 3.876, "step": 1126 }, { "epoch": 0.0924827056110684, "grad_norm": 0.7967925667762756, "learning_rate": 9.950958065664741e-05, "loss": 3.9378, "step": 1128 }, { "epoch": 0.09264668203945683, "grad_norm": 0.7915927171707153, "learning_rate": 9.950578524573377e-05, "loss": 3.8823, "step": 1130 }, { "epoch": 0.09281065846784525, "grad_norm": 0.8065016865730286, "learning_rate": 9.950197527776033e-05, "loss": 3.9223, "step": 1132 }, { "epoch": 0.09297463489623367, "grad_norm": 0.7818952202796936, "learning_rate": 9.949815075384742e-05, "loss": 3.9015, "step": 1134 }, { "epoch": 0.09313861132462209, "grad_norm": 0.9576020240783691, "learning_rate": 9.949431167511963e-05, "loss": 3.9206, "step": 1136 }, { "epoch": 0.09330258775301051, "grad_norm": 0.8579282760620117, "learning_rate": 9.949045804270581e-05, "loss": 3.9195, "step": 1138 }, { "epoch": 0.09346656418139893, "grad_norm": 0.7089054584503174, "learning_rate": 9.948658985773915e-05, "loss": 3.8824, "step": 1140 }, { "epoch": 0.09363054060978734, "grad_norm": 0.7162330150604248, "learning_rate": 9.948270712135705e-05, "loss": 3.8758, "step": 1142 }, { "epoch": 0.09379451703817576, "grad_norm": 1.0738468170166016, "learning_rate": 9.947880983470124e-05, "loss": 3.8408, "step": 1144 }, { "epoch": 0.09395849346656418, "grad_norm": 0.8277477025985718, "learning_rate": 9.947489799891769e-05, "loss": 3.87, "step": 1146 }, { "epoch": 0.0941224698949526, "grad_norm": 0.7950448989868164, "learning_rate": 9.947097161515668e-05, "loss": 3.9011, "step": 1148 }, { "epoch": 0.09428644632334102, "grad_norm": 0.9803164601325989, "learning_rate": 9.946703068457275e-05, "loss": 3.8423, "step": 1150 }, { "epoch": 0.09445042275172943, "grad_norm": 0.9193939566612244, "learning_rate": 9.946307520832472e-05, "loss": 3.892, "step": 1152 }, { "epoch": 0.09461439918011785, "grad_norm": 0.8781881928443909, "learning_rate": 9.94591051875757e-05, "loss": 3.8793, "step": 1154 }, { "epoch": 0.09477837560850627, "grad_norm": 0.7899143695831299, "learning_rate": 9.945512062349304e-05, "loss": 3.8543, "step": 1156 }, { "epoch": 0.0949423520368947, "grad_norm": 0.9870477914810181, "learning_rate": 9.94511215172484e-05, "loss": 3.8322, "step": 1158 }, { "epoch": 0.09510632846528312, "grad_norm": 1.0156104564666748, "learning_rate": 9.944710787001773e-05, "loss": 3.8877, "step": 1160 }, { "epoch": 0.09527030489367154, "grad_norm": 0.9456477165222168, "learning_rate": 9.94430796829812e-05, "loss": 3.873, "step": 1162 }, { "epoch": 0.09543428132205996, "grad_norm": 0.808631956577301, "learning_rate": 9.943903695732333e-05, "loss": 3.8156, "step": 1164 }, { "epoch": 0.09559825775044838, "grad_norm": 0.9766041040420532, "learning_rate": 9.943497969423283e-05, "loss": 3.8912, "step": 1166 }, { "epoch": 0.0957622341788368, "grad_norm": 1.068718671798706, "learning_rate": 9.943090789490276e-05, "loss": 3.8365, "step": 1168 }, { "epoch": 0.09592621060722521, "grad_norm": 0.8382964134216309, "learning_rate": 9.94268215605304e-05, "loss": 3.8484, "step": 1170 }, { "epoch": 0.09609018703561363, "grad_norm": 0.9153487086296082, "learning_rate": 9.942272069231735e-05, "loss": 3.8154, "step": 1172 }, { "epoch": 0.09625416346400205, "grad_norm": 0.8782140016555786, "learning_rate": 9.941860529146944e-05, "loss": 3.8068, "step": 1174 }, { "epoch": 0.09641813989239047, "grad_norm": 1.0472065210342407, "learning_rate": 9.941447535919681e-05, "loss": 3.887, "step": 1176 }, { "epoch": 0.09658211632077889, "grad_norm": 0.8168578743934631, "learning_rate": 9.941033089671385e-05, "loss": 3.8221, "step": 1178 }, { "epoch": 0.0967460927491673, "grad_norm": 0.8000882863998413, "learning_rate": 9.940617190523923e-05, "loss": 3.8425, "step": 1180 }, { "epoch": 0.09691006917755572, "grad_norm": 0.9577187895774841, "learning_rate": 9.940199838599588e-05, "loss": 3.8679, "step": 1182 }, { "epoch": 0.09707404560594414, "grad_norm": 0.8853087425231934, "learning_rate": 9.939781034021105e-05, "loss": 3.8584, "step": 1184 }, { "epoch": 0.09723802203433256, "grad_norm": 0.811404824256897, "learning_rate": 9.939360776911619e-05, "loss": 3.8383, "step": 1186 }, { "epoch": 0.09740199846272099, "grad_norm": 0.811406672000885, "learning_rate": 9.938939067394706e-05, "loss": 3.7581, "step": 1188 }, { "epoch": 0.09756597489110941, "grad_norm": 0.8467538356781006, "learning_rate": 9.93851590559437e-05, "loss": 3.8692, "step": 1190 }, { "epoch": 0.09772995131949783, "grad_norm": 0.8470588326454163, "learning_rate": 9.938091291635039e-05, "loss": 3.8054, "step": 1192 }, { "epoch": 0.09789392774788624, "grad_norm": 0.8332253694534302, "learning_rate": 9.93766522564157e-05, "loss": 3.8541, "step": 1194 }, { "epoch": 0.09805790417627466, "grad_norm": 0.7565471529960632, "learning_rate": 9.93723770773925e-05, "loss": 3.7615, "step": 1196 }, { "epoch": 0.09822188060466308, "grad_norm": 0.9599220156669617, "learning_rate": 9.936808738053785e-05, "loss": 3.8253, "step": 1198 }, { "epoch": 0.0983858570330515, "grad_norm": 0.7333558201789856, "learning_rate": 9.936378316711317e-05, "loss": 3.8042, "step": 1200 }, { "epoch": 0.09854983346143992, "grad_norm": 0.7844712138175964, "learning_rate": 9.935946443838407e-05, "loss": 3.8675, "step": 1202 }, { "epoch": 0.09871380988982834, "grad_norm": 0.9951752424240112, "learning_rate": 9.935513119562045e-05, "loss": 3.8046, "step": 1204 }, { "epoch": 0.09887778631821675, "grad_norm": 0.8403246998786926, "learning_rate": 9.935078344009654e-05, "loss": 3.8651, "step": 1206 }, { "epoch": 0.09904176274660517, "grad_norm": 0.9809087514877319, "learning_rate": 9.934642117309074e-05, "loss": 3.7967, "step": 1208 }, { "epoch": 0.09920573917499359, "grad_norm": 1.024038553237915, "learning_rate": 9.93420443958858e-05, "loss": 3.798, "step": 1210 }, { "epoch": 0.09936971560338201, "grad_norm": 0.8824047446250916, "learning_rate": 9.933765310976867e-05, "loss": 3.7725, "step": 1212 }, { "epoch": 0.09953369203177043, "grad_norm": 0.7645026445388794, "learning_rate": 9.933324731603063e-05, "loss": 3.7802, "step": 1214 }, { "epoch": 0.09969766846015886, "grad_norm": 0.7119176387786865, "learning_rate": 9.932882701596716e-05, "loss": 3.7905, "step": 1216 }, { "epoch": 0.09986164488854728, "grad_norm": 0.6710290908813477, "learning_rate": 9.932439221087806e-05, "loss": 3.7898, "step": 1218 }, { "epoch": 0.1000256213169357, "grad_norm": 0.8256493210792542, "learning_rate": 9.931994290206738e-05, "loss": 3.7857, "step": 1220 }, { "epoch": 0.10018959774532411, "grad_norm": 0.8101679086685181, "learning_rate": 9.931547909084339e-05, "loss": 3.806, "step": 1222 }, { "epoch": 0.10035357417371253, "grad_norm": 0.7876362204551697, "learning_rate": 9.931100077851871e-05, "loss": 3.7395, "step": 1224 }, { "epoch": 0.10051755060210095, "grad_norm": 0.7746016979217529, "learning_rate": 9.930650796641017e-05, "loss": 3.7961, "step": 1226 }, { "epoch": 0.10068152703048937, "grad_norm": 0.6673750877380371, "learning_rate": 9.930200065583883e-05, "loss": 3.7708, "step": 1228 }, { "epoch": 0.10084550345887779, "grad_norm": 0.796775221824646, "learning_rate": 9.92974788481301e-05, "loss": 3.8279, "step": 1230 }, { "epoch": 0.1010094798872662, "grad_norm": 0.725659191608429, "learning_rate": 9.929294254461359e-05, "loss": 3.7343, "step": 1232 }, { "epoch": 0.10117345631565462, "grad_norm": 0.7385995984077454, "learning_rate": 9.928839174662317e-05, "loss": 3.8351, "step": 1234 }, { "epoch": 0.10133743274404304, "grad_norm": 0.7573429346084595, "learning_rate": 9.928382645549703e-05, "loss": 3.8307, "step": 1236 }, { "epoch": 0.10150140917243146, "grad_norm": 0.7082958221435547, "learning_rate": 9.927924667257756e-05, "loss": 3.812, "step": 1238 }, { "epoch": 0.10166538560081988, "grad_norm": 0.819148063659668, "learning_rate": 9.927465239921143e-05, "loss": 3.8176, "step": 1240 }, { "epoch": 0.1018293620292083, "grad_norm": 0.8235107660293579, "learning_rate": 9.927004363674959e-05, "loss": 3.766, "step": 1242 }, { "epoch": 0.10199333845759673, "grad_norm": 0.8283859491348267, "learning_rate": 9.926542038654722e-05, "loss": 3.7771, "step": 1244 }, { "epoch": 0.10215731488598515, "grad_norm": 0.739612340927124, "learning_rate": 9.92607826499638e-05, "loss": 3.7786, "step": 1246 }, { "epoch": 0.10232129131437356, "grad_norm": 0.6946161985397339, "learning_rate": 9.925613042836302e-05, "loss": 3.7143, "step": 1248 }, { "epoch": 0.10248526774276198, "grad_norm": 0.8461303114891052, "learning_rate": 9.925146372311288e-05, "loss": 3.7804, "step": 1250 }, { "epoch": 0.1026492441711504, "grad_norm": 0.8450109958648682, "learning_rate": 9.924678253558557e-05, "loss": 3.7209, "step": 1252 }, { "epoch": 0.10281322059953882, "grad_norm": 0.7622053623199463, "learning_rate": 9.924208686715763e-05, "loss": 3.7491, "step": 1254 }, { "epoch": 0.10297719702792724, "grad_norm": 0.8452515602111816, "learning_rate": 9.923737671920978e-05, "loss": 3.7461, "step": 1256 }, { "epoch": 0.10314117345631565, "grad_norm": 0.7765418887138367, "learning_rate": 9.923265209312704e-05, "loss": 3.7491, "step": 1258 }, { "epoch": 0.10330514988470407, "grad_norm": 0.8474555015563965, "learning_rate": 9.922791299029868e-05, "loss": 3.7277, "step": 1260 }, { "epoch": 0.10346912631309249, "grad_norm": 0.9031925201416016, "learning_rate": 9.922315941211823e-05, "loss": 3.7966, "step": 1262 }, { "epoch": 0.10363310274148091, "grad_norm": 0.7896429300308228, "learning_rate": 9.921839135998343e-05, "loss": 3.7119, "step": 1264 }, { "epoch": 0.10379707916986933, "grad_norm": 0.8127464056015015, "learning_rate": 9.921360883529636e-05, "loss": 3.7126, "step": 1266 }, { "epoch": 0.10396105559825775, "grad_norm": 0.9778748750686646, "learning_rate": 9.920881183946328e-05, "loss": 3.786, "step": 1268 }, { "epoch": 0.10412503202664616, "grad_norm": 0.9540830254554749, "learning_rate": 9.920400037389474e-05, "loss": 3.7243, "step": 1270 }, { "epoch": 0.10428900845503458, "grad_norm": 1.0872488021850586, "learning_rate": 9.919917444000555e-05, "loss": 3.7761, "step": 1272 }, { "epoch": 0.10445298488342301, "grad_norm": 0.8405986428260803, "learning_rate": 9.919433403921476e-05, "loss": 3.7128, "step": 1274 }, { "epoch": 0.10461696131181143, "grad_norm": 0.9105572700500488, "learning_rate": 9.918947917294568e-05, "loss": 3.7153, "step": 1276 }, { "epoch": 0.10478093774019985, "grad_norm": 0.7782844305038452, "learning_rate": 9.918460984262588e-05, "loss": 3.6612, "step": 1278 }, { "epoch": 0.10494491416858827, "grad_norm": 0.746457576751709, "learning_rate": 9.917972604968715e-05, "loss": 3.7196, "step": 1280 }, { "epoch": 0.10510889059697669, "grad_norm": 0.856855034828186, "learning_rate": 9.917482779556557e-05, "loss": 3.6806, "step": 1282 }, { "epoch": 0.1052728670253651, "grad_norm": 0.6793504953384399, "learning_rate": 9.916991508170148e-05, "loss": 3.706, "step": 1284 }, { "epoch": 0.10543684345375352, "grad_norm": 0.9537250995635986, "learning_rate": 9.916498790953943e-05, "loss": 3.7844, "step": 1286 }, { "epoch": 0.10560081988214194, "grad_norm": 0.6688050627708435, "learning_rate": 9.916004628052824e-05, "loss": 3.7161, "step": 1288 }, { "epoch": 0.10576479631053036, "grad_norm": 0.7840797305107117, "learning_rate": 9.9155090196121e-05, "loss": 3.7309, "step": 1290 }, { "epoch": 0.10592877273891878, "grad_norm": 0.7196126580238342, "learning_rate": 9.9150119657775e-05, "loss": 3.7571, "step": 1292 }, { "epoch": 0.1060927491673072, "grad_norm": 0.8072746396064758, "learning_rate": 9.914513466695188e-05, "loss": 3.713, "step": 1294 }, { "epoch": 0.10625672559569561, "grad_norm": 0.7342846393585205, "learning_rate": 9.914013522511743e-05, "loss": 3.7195, "step": 1296 }, { "epoch": 0.10642070202408403, "grad_norm": 0.7047367691993713, "learning_rate": 9.91351213337417e-05, "loss": 3.7306, "step": 1298 }, { "epoch": 0.10658467845247245, "grad_norm": 0.6987332105636597, "learning_rate": 9.913009299429904e-05, "loss": 3.7034, "step": 1300 }, { "epoch": 0.10674865488086088, "grad_norm": 0.6787108778953552, "learning_rate": 9.912505020826801e-05, "loss": 3.7059, "step": 1302 }, { "epoch": 0.1069126313092493, "grad_norm": 0.6666189432144165, "learning_rate": 9.911999297713145e-05, "loss": 3.7006, "step": 1304 }, { "epoch": 0.10707660773763772, "grad_norm": 0.6904592514038086, "learning_rate": 9.91149213023764e-05, "loss": 3.762, "step": 1306 }, { "epoch": 0.10724058416602614, "grad_norm": 0.8941283822059631, "learning_rate": 9.91098351854942e-05, "loss": 3.6793, "step": 1308 }, { "epoch": 0.10740456059441456, "grad_norm": 0.7099062204360962, "learning_rate": 9.910473462798039e-05, "loss": 3.6232, "step": 1310 }, { "epoch": 0.10756853702280297, "grad_norm": 0.8660025596618652, "learning_rate": 9.909961963133479e-05, "loss": 3.7272, "step": 1312 }, { "epoch": 0.10773251345119139, "grad_norm": 0.7533067464828491, "learning_rate": 9.909449019706145e-05, "loss": 3.7422, "step": 1314 }, { "epoch": 0.10789648987957981, "grad_norm": 0.7809666991233826, "learning_rate": 9.908934632666864e-05, "loss": 3.6608, "step": 1316 }, { "epoch": 0.10806046630796823, "grad_norm": 0.7331179976463318, "learning_rate": 9.908418802166894e-05, "loss": 3.6718, "step": 1318 }, { "epoch": 0.10822444273635665, "grad_norm": 0.7965632081031799, "learning_rate": 9.907901528357915e-05, "loss": 3.7616, "step": 1320 }, { "epoch": 0.10838841916474506, "grad_norm": 0.7728394269943237, "learning_rate": 9.907382811392026e-05, "loss": 3.6811, "step": 1322 }, { "epoch": 0.10855239559313348, "grad_norm": 0.7595298290252686, "learning_rate": 9.906862651421756e-05, "loss": 3.7385, "step": 1324 }, { "epoch": 0.1087163720215219, "grad_norm": 0.8519642353057861, "learning_rate": 9.906341048600056e-05, "loss": 3.7245, "step": 1326 }, { "epoch": 0.10888034844991032, "grad_norm": 0.7890890836715698, "learning_rate": 9.905818003080305e-05, "loss": 3.7362, "step": 1328 }, { "epoch": 0.10904432487829875, "grad_norm": 0.784578800201416, "learning_rate": 9.9052935150163e-05, "loss": 3.6611, "step": 1330 }, { "epoch": 0.10920830130668717, "grad_norm": 0.8048536777496338, "learning_rate": 9.904767584562267e-05, "loss": 3.7034, "step": 1332 }, { "epoch": 0.10937227773507559, "grad_norm": 0.7695967555046082, "learning_rate": 9.904240211872855e-05, "loss": 3.6495, "step": 1334 }, { "epoch": 0.109536254163464, "grad_norm": 0.6730368733406067, "learning_rate": 9.903711397103136e-05, "loss": 3.6522, "step": 1336 }, { "epoch": 0.10970023059185242, "grad_norm": 0.7607198357582092, "learning_rate": 9.903181140408609e-05, "loss": 3.6837, "step": 1338 }, { "epoch": 0.10986420702024084, "grad_norm": 0.7482820749282837, "learning_rate": 9.902649441945188e-05, "loss": 3.6851, "step": 1340 }, { "epoch": 0.11002818344862926, "grad_norm": 0.7840356230735779, "learning_rate": 9.902116301869227e-05, "loss": 3.6291, "step": 1342 }, { "epoch": 0.11019215987701768, "grad_norm": 0.6231241822242737, "learning_rate": 9.901581720337488e-05, "loss": 3.6361, "step": 1344 }, { "epoch": 0.1103561363054061, "grad_norm": 0.7832990884780884, "learning_rate": 9.901045697507165e-05, "loss": 3.6948, "step": 1346 }, { "epoch": 0.11052011273379451, "grad_norm": 0.7512111067771912, "learning_rate": 9.900508233535875e-05, "loss": 3.6697, "step": 1348 }, { "epoch": 0.11068408916218293, "grad_norm": 0.7433375716209412, "learning_rate": 9.899969328581659e-05, "loss": 3.7029, "step": 1350 }, { "epoch": 0.11084806559057135, "grad_norm": 0.7757459878921509, "learning_rate": 9.899428982802979e-05, "loss": 3.6965, "step": 1352 }, { "epoch": 0.11101204201895977, "grad_norm": 0.7528740763664246, "learning_rate": 9.898887196358721e-05, "loss": 3.6376, "step": 1354 }, { "epoch": 0.11117601844734819, "grad_norm": 0.7715753316879272, "learning_rate": 9.898343969408199e-05, "loss": 3.6403, "step": 1356 }, { "epoch": 0.11133999487573662, "grad_norm": 0.8425229787826538, "learning_rate": 9.897799302111146e-05, "loss": 3.6655, "step": 1358 }, { "epoch": 0.11150397130412504, "grad_norm": 0.8818288445472717, "learning_rate": 9.897253194627722e-05, "loss": 3.6635, "step": 1360 }, { "epoch": 0.11166794773251346, "grad_norm": 0.9619779586791992, "learning_rate": 9.896705647118504e-05, "loss": 3.6766, "step": 1362 }, { "epoch": 0.11183192416090187, "grad_norm": 0.9253937005996704, "learning_rate": 9.896156659744504e-05, "loss": 3.6859, "step": 1364 }, { "epoch": 0.11199590058929029, "grad_norm": 0.9797042608261108, "learning_rate": 9.895606232667144e-05, "loss": 3.649, "step": 1366 }, { "epoch": 0.11215987701767871, "grad_norm": 0.7821505665779114, "learning_rate": 9.895054366048281e-05, "loss": 3.7164, "step": 1368 }, { "epoch": 0.11232385344606713, "grad_norm": 0.8402411937713623, "learning_rate": 9.894501060050186e-05, "loss": 3.7369, "step": 1370 }, { "epoch": 0.11248782987445555, "grad_norm": 0.9361245036125183, "learning_rate": 9.893946314835559e-05, "loss": 3.6806, "step": 1372 }, { "epoch": 0.11265180630284397, "grad_norm": 0.9412410855293274, "learning_rate": 9.893390130567523e-05, "loss": 3.676, "step": 1374 }, { "epoch": 0.11281578273123238, "grad_norm": 0.8595388531684875, "learning_rate": 9.89283250740962e-05, "loss": 3.6322, "step": 1376 }, { "epoch": 0.1129797591596208, "grad_norm": 0.7236664295196533, "learning_rate": 9.892273445525817e-05, "loss": 3.6686, "step": 1378 }, { "epoch": 0.11314373558800922, "grad_norm": 0.659474790096283, "learning_rate": 9.891712945080508e-05, "loss": 3.6125, "step": 1380 }, { "epoch": 0.11330771201639764, "grad_norm": 0.7330523133277893, "learning_rate": 9.891151006238507e-05, "loss": 3.6654, "step": 1382 }, { "epoch": 0.11347168844478606, "grad_norm": 0.8555669784545898, "learning_rate": 9.890587629165049e-05, "loss": 3.5857, "step": 1384 }, { "epoch": 0.11363566487317447, "grad_norm": 0.9295856952667236, "learning_rate": 9.890022814025792e-05, "loss": 3.6874, "step": 1386 }, { "epoch": 0.1137996413015629, "grad_norm": 0.63069087266922, "learning_rate": 9.889456560986823e-05, "loss": 3.6358, "step": 1388 }, { "epoch": 0.11396361772995132, "grad_norm": 0.7602105140686035, "learning_rate": 9.888888870214643e-05, "loss": 3.6113, "step": 1390 }, { "epoch": 0.11412759415833974, "grad_norm": 0.7809365391731262, "learning_rate": 9.888319741876185e-05, "loss": 3.6585, "step": 1392 }, { "epoch": 0.11429157058672816, "grad_norm": 0.6976439952850342, "learning_rate": 9.887749176138794e-05, "loss": 3.695, "step": 1394 }, { "epoch": 0.11445554701511658, "grad_norm": 0.775341272354126, "learning_rate": 9.887177173170248e-05, "loss": 3.6783, "step": 1396 }, { "epoch": 0.114619523443505, "grad_norm": 0.7208604216575623, "learning_rate": 9.886603733138742e-05, "loss": 3.692, "step": 1398 }, { "epoch": 0.11478349987189342, "grad_norm": 0.7146006226539612, "learning_rate": 9.886028856212893e-05, "loss": 3.6103, "step": 1400 }, { "epoch": 0.11494747630028183, "grad_norm": 0.6759282946586609, "learning_rate": 9.885452542561744e-05, "loss": 3.7273, "step": 1402 }, { "epoch": 0.11511145272867025, "grad_norm": 0.680182158946991, "learning_rate": 9.884874792354758e-05, "loss": 3.6314, "step": 1404 }, { "epoch": 0.11527542915705867, "grad_norm": 0.8232313394546509, "learning_rate": 9.884295605761822e-05, "loss": 3.6098, "step": 1406 }, { "epoch": 0.11543940558544709, "grad_norm": 0.6593087911605835, "learning_rate": 9.883714982953244e-05, "loss": 3.5716, "step": 1408 }, { "epoch": 0.1156033820138355, "grad_norm": 0.6459859013557434, "learning_rate": 9.883132924099753e-05, "loss": 3.6418, "step": 1410 }, { "epoch": 0.11576735844222392, "grad_norm": 0.7395800352096558, "learning_rate": 9.882549429372505e-05, "loss": 3.6148, "step": 1412 }, { "epoch": 0.11593133487061234, "grad_norm": 0.6539946794509888, "learning_rate": 9.881964498943074e-05, "loss": 3.6126, "step": 1414 }, { "epoch": 0.11609531129900078, "grad_norm": 0.7250804901123047, "learning_rate": 9.881378132983456e-05, "loss": 3.5968, "step": 1416 }, { "epoch": 0.1162592877273892, "grad_norm": 0.6665759086608887, "learning_rate": 9.880790331666073e-05, "loss": 3.5718, "step": 1418 }, { "epoch": 0.11642326415577761, "grad_norm": 0.6589260697364807, "learning_rate": 9.880201095163765e-05, "loss": 3.6868, "step": 1420 }, { "epoch": 0.11658724058416603, "grad_norm": 0.7453758716583252, "learning_rate": 9.879610423649795e-05, "loss": 3.5785, "step": 1422 }, { "epoch": 0.11675121701255445, "grad_norm": 0.760637640953064, "learning_rate": 9.879018317297852e-05, "loss": 3.6157, "step": 1424 }, { "epoch": 0.11691519344094287, "grad_norm": 0.7891120910644531, "learning_rate": 9.878424776282039e-05, "loss": 3.6347, "step": 1426 }, { "epoch": 0.11707916986933128, "grad_norm": 0.8159520626068115, "learning_rate": 9.877829800776887e-05, "loss": 3.5827, "step": 1428 }, { "epoch": 0.1172431462977197, "grad_norm": 0.7945658564567566, "learning_rate": 9.877233390957348e-05, "loss": 3.6225, "step": 1430 }, { "epoch": 0.11740712272610812, "grad_norm": 0.6557226777076721, "learning_rate": 9.876635546998795e-05, "loss": 3.5789, "step": 1432 }, { "epoch": 0.11757109915449654, "grad_norm": 0.7214797735214233, "learning_rate": 9.876036269077021e-05, "loss": 3.6572, "step": 1434 }, { "epoch": 0.11773507558288496, "grad_norm": 0.6005678772926331, "learning_rate": 9.875435557368245e-05, "loss": 3.5788, "step": 1436 }, { "epoch": 0.11789905201127338, "grad_norm": 0.7711865901947021, "learning_rate": 9.874833412049102e-05, "loss": 3.6336, "step": 1438 }, { "epoch": 0.1180630284396618, "grad_norm": 0.6728807091712952, "learning_rate": 9.874229833296654e-05, "loss": 3.5508, "step": 1440 }, { "epoch": 0.11822700486805021, "grad_norm": 0.7253196239471436, "learning_rate": 9.873624821288378e-05, "loss": 3.5958, "step": 1442 }, { "epoch": 0.11839098129643864, "grad_norm": 0.7579479813575745, "learning_rate": 9.87301837620218e-05, "loss": 3.5491, "step": 1444 }, { "epoch": 0.11855495772482706, "grad_norm": 0.8953156471252441, "learning_rate": 9.872410498216382e-05, "loss": 3.5745, "step": 1446 }, { "epoch": 0.11871893415321548, "grad_norm": 0.7993676662445068, "learning_rate": 9.87180118750973e-05, "loss": 3.6284, "step": 1448 }, { "epoch": 0.1188829105816039, "grad_norm": 0.7717795968055725, "learning_rate": 9.871190444261391e-05, "loss": 3.6401, "step": 1450 }, { "epoch": 0.11904688700999232, "grad_norm": 0.7042213082313538, "learning_rate": 9.870578268650951e-05, "loss": 3.5621, "step": 1452 }, { "epoch": 0.11921086343838073, "grad_norm": 0.7422952651977539, "learning_rate": 9.86996466085842e-05, "loss": 3.6372, "step": 1454 }, { "epoch": 0.11937483986676915, "grad_norm": 0.6974375247955322, "learning_rate": 9.869349621064228e-05, "loss": 3.6112, "step": 1456 }, { "epoch": 0.11953881629515757, "grad_norm": 0.6447159051895142, "learning_rate": 9.868733149449224e-05, "loss": 3.6014, "step": 1458 }, { "epoch": 0.11970279272354599, "grad_norm": 0.7838313579559326, "learning_rate": 9.868115246194682e-05, "loss": 3.6063, "step": 1460 }, { "epoch": 0.11986676915193441, "grad_norm": 0.7431493997573853, "learning_rate": 9.867495911482295e-05, "loss": 3.6058, "step": 1462 }, { "epoch": 0.12003074558032283, "grad_norm": 0.7936450242996216, "learning_rate": 9.866875145494175e-05, "loss": 3.5997, "step": 1464 }, { "epoch": 0.12019472200871124, "grad_norm": 0.7873966097831726, "learning_rate": 9.866252948412859e-05, "loss": 3.5496, "step": 1466 }, { "epoch": 0.12035869843709966, "grad_norm": 0.6305302381515503, "learning_rate": 9.865629320421301e-05, "loss": 3.5844, "step": 1468 }, { "epoch": 0.12052267486548808, "grad_norm": 0.8464959263801575, "learning_rate": 9.865004261702879e-05, "loss": 3.5736, "step": 1470 }, { "epoch": 0.1206866512938765, "grad_norm": 0.5869840383529663, "learning_rate": 9.86437777244139e-05, "loss": 3.5445, "step": 1472 }, { "epoch": 0.12085062772226493, "grad_norm": 0.680647611618042, "learning_rate": 9.863749852821049e-05, "loss": 3.5203, "step": 1474 }, { "epoch": 0.12101460415065335, "grad_norm": 0.61550372838974, "learning_rate": 9.863120503026497e-05, "loss": 3.5638, "step": 1476 }, { "epoch": 0.12117858057904177, "grad_norm": 0.7552183270454407, "learning_rate": 9.862489723242792e-05, "loss": 3.5792, "step": 1478 }, { "epoch": 0.12134255700743019, "grad_norm": 0.6832086443901062, "learning_rate": 9.861857513655413e-05, "loss": 3.5966, "step": 1480 }, { "epoch": 0.1215065334358186, "grad_norm": 0.7540295124053955, "learning_rate": 9.86122387445026e-05, "loss": 3.6294, "step": 1482 }, { "epoch": 0.12167050986420702, "grad_norm": 0.8079466223716736, "learning_rate": 9.860588805813653e-05, "loss": 3.5425, "step": 1484 }, { "epoch": 0.12183448629259544, "grad_norm": 0.6720893979072571, "learning_rate": 9.859952307932334e-05, "loss": 3.5946, "step": 1486 }, { "epoch": 0.12199846272098386, "grad_norm": 0.7064858078956604, "learning_rate": 9.85931438099346e-05, "loss": 3.5714, "step": 1488 }, { "epoch": 0.12216243914937228, "grad_norm": 0.7637129426002502, "learning_rate": 9.858675025184616e-05, "loss": 3.5547, "step": 1490 }, { "epoch": 0.1223264155777607, "grad_norm": 0.7691531181335449, "learning_rate": 9.8580342406938e-05, "loss": 3.5592, "step": 1492 }, { "epoch": 0.12249039200614911, "grad_norm": 0.8551957607269287, "learning_rate": 9.857392027709435e-05, "loss": 3.5714, "step": 1494 }, { "epoch": 0.12265436843453753, "grad_norm": 0.792309582233429, "learning_rate": 9.856748386420362e-05, "loss": 3.6033, "step": 1496 }, { "epoch": 0.12281834486292595, "grad_norm": 0.6698387861251831, "learning_rate": 9.856103317015841e-05, "loss": 3.5304, "step": 1498 }, { "epoch": 0.12298232129131437, "grad_norm": 0.7881389260292053, "learning_rate": 9.855456819685555e-05, "loss": 3.551, "step": 1500 }, { "epoch": 0.1231462977197028, "grad_norm": 0.7923277616500854, "learning_rate": 9.854808894619602e-05, "loss": 3.5718, "step": 1502 }, { "epoch": 0.12331027414809122, "grad_norm": 0.8148519992828369, "learning_rate": 9.854159542008508e-05, "loss": 3.5866, "step": 1504 }, { "epoch": 0.12347425057647964, "grad_norm": 0.7714492082595825, "learning_rate": 9.853508762043209e-05, "loss": 3.6145, "step": 1506 }, { "epoch": 0.12363822700486805, "grad_norm": 0.8480067253112793, "learning_rate": 9.852856554915066e-05, "loss": 3.5683, "step": 1508 }, { "epoch": 0.12380220343325647, "grad_norm": 0.6881988644599915, "learning_rate": 9.85220292081586e-05, "loss": 3.5086, "step": 1510 }, { "epoch": 0.12396617986164489, "grad_norm": 0.7517293095588684, "learning_rate": 9.85154785993779e-05, "loss": 3.582, "step": 1512 }, { "epoch": 0.12413015629003331, "grad_norm": 1.0973633527755737, "learning_rate": 9.850891372473478e-05, "loss": 3.5674, "step": 1514 }, { "epoch": 0.12429413271842173, "grad_norm": 0.837307870388031, "learning_rate": 9.850233458615957e-05, "loss": 3.6073, "step": 1516 }, { "epoch": 0.12445810914681014, "grad_norm": 0.9713445901870728, "learning_rate": 9.84957411855869e-05, "loss": 3.5698, "step": 1518 }, { "epoch": 0.12462208557519856, "grad_norm": 0.9163139462471008, "learning_rate": 9.848913352495551e-05, "loss": 3.6004, "step": 1520 }, { "epoch": 0.12478606200358698, "grad_norm": 0.7779731154441833, "learning_rate": 9.848251160620839e-05, "loss": 3.6013, "step": 1522 }, { "epoch": 0.1249500384319754, "grad_norm": 0.7217362523078918, "learning_rate": 9.847587543129269e-05, "loss": 3.5181, "step": 1524 }, { "epoch": 0.12511401486036383, "grad_norm": 0.7261420488357544, "learning_rate": 9.846922500215976e-05, "loss": 3.5826, "step": 1526 }, { "epoch": 0.12527799128875225, "grad_norm": 0.6862401962280273, "learning_rate": 9.846256032076515e-05, "loss": 3.4789, "step": 1528 }, { "epoch": 0.12544196771714067, "grad_norm": 0.7996855974197388, "learning_rate": 9.845588138906859e-05, "loss": 3.5581, "step": 1530 }, { "epoch": 0.12560594414552909, "grad_norm": 0.6853513717651367, "learning_rate": 9.8449188209034e-05, "loss": 3.5594, "step": 1532 }, { "epoch": 0.1257699205739175, "grad_norm": 0.7655189037322998, "learning_rate": 9.84424807826295e-05, "loss": 3.5514, "step": 1534 }, { "epoch": 0.12593389700230592, "grad_norm": 0.6501437425613403, "learning_rate": 9.84357591118274e-05, "loss": 3.5318, "step": 1536 }, { "epoch": 0.12609787343069434, "grad_norm": 0.7742712497711182, "learning_rate": 9.842902319860417e-05, "loss": 3.505, "step": 1538 }, { "epoch": 0.12626184985908276, "grad_norm": 0.632228672504425, "learning_rate": 9.842227304494051e-05, "loss": 3.6209, "step": 1540 }, { "epoch": 0.12642582628747118, "grad_norm": 0.5981665253639221, "learning_rate": 9.841550865282128e-05, "loss": 3.5373, "step": 1542 }, { "epoch": 0.1265898027158596, "grad_norm": 0.6225053071975708, "learning_rate": 9.840873002423552e-05, "loss": 3.5161, "step": 1544 }, { "epoch": 0.126753779144248, "grad_norm": 0.6428139209747314, "learning_rate": 9.84019371611765e-05, "loss": 3.5194, "step": 1546 }, { "epoch": 0.12691775557263643, "grad_norm": 0.6379141807556152, "learning_rate": 9.83951300656416e-05, "loss": 3.5363, "step": 1548 }, { "epoch": 0.12708173200102485, "grad_norm": 0.6773483753204346, "learning_rate": 9.838830873963249e-05, "loss": 3.5162, "step": 1550 }, { "epoch": 0.12724570842941327, "grad_norm": 0.6917803883552551, "learning_rate": 9.838147318515491e-05, "loss": 3.5388, "step": 1552 }, { "epoch": 0.12740968485780169, "grad_norm": 0.6757781505584717, "learning_rate": 9.837462340421886e-05, "loss": 3.5399, "step": 1554 }, { "epoch": 0.1275736612861901, "grad_norm": 0.6116536855697632, "learning_rate": 9.836775939883852e-05, "loss": 3.5487, "step": 1556 }, { "epoch": 0.12773763771457852, "grad_norm": 0.6963580250740051, "learning_rate": 9.836088117103222e-05, "loss": 3.5252, "step": 1558 }, { "epoch": 0.12790161414296694, "grad_norm": 0.7524001002311707, "learning_rate": 9.835398872282247e-05, "loss": 3.507, "step": 1560 }, { "epoch": 0.12806559057135536, "grad_norm": 0.6589372754096985, "learning_rate": 9.834708205623599e-05, "loss": 3.5236, "step": 1562 }, { "epoch": 0.12822956699974378, "grad_norm": 0.6432667970657349, "learning_rate": 9.834016117330369e-05, "loss": 3.5065, "step": 1564 }, { "epoch": 0.1283935434281322, "grad_norm": 0.6807281970977783, "learning_rate": 9.833322607606062e-05, "loss": 3.494, "step": 1566 }, { "epoch": 0.1285575198565206, "grad_norm": 0.6932308077812195, "learning_rate": 9.832627676654601e-05, "loss": 3.5196, "step": 1568 }, { "epoch": 0.12872149628490903, "grad_norm": 0.6904752254486084, "learning_rate": 9.831931324680333e-05, "loss": 3.5883, "step": 1570 }, { "epoch": 0.12888547271329745, "grad_norm": 0.7266760468482971, "learning_rate": 9.831233551888015e-05, "loss": 3.5637, "step": 1572 }, { "epoch": 0.1290494491416859, "grad_norm": 0.6184994578361511, "learning_rate": 9.830534358482827e-05, "loss": 3.512, "step": 1574 }, { "epoch": 0.12921342557007431, "grad_norm": 0.6875196695327759, "learning_rate": 9.829833744670366e-05, "loss": 3.5822, "step": 1576 }, { "epoch": 0.12937740199846273, "grad_norm": 0.6475251317024231, "learning_rate": 9.829131710656645e-05, "loss": 3.4807, "step": 1578 }, { "epoch": 0.12954137842685115, "grad_norm": 0.6316696405410767, "learning_rate": 9.828428256648095e-05, "loss": 3.5333, "step": 1580 }, { "epoch": 0.12970535485523957, "grad_norm": 0.6235971450805664, "learning_rate": 9.827723382851565e-05, "loss": 3.4228, "step": 1582 }, { "epoch": 0.129869331283628, "grad_norm": 0.7109240293502808, "learning_rate": 9.827017089474324e-05, "loss": 3.4983, "step": 1584 }, { "epoch": 0.1300333077120164, "grad_norm": 0.7131465673446655, "learning_rate": 9.826309376724052e-05, "loss": 3.5094, "step": 1586 }, { "epoch": 0.13019728414040482, "grad_norm": 0.6761036515235901, "learning_rate": 9.825600244808853e-05, "loss": 3.5461, "step": 1588 }, { "epoch": 0.13036126056879324, "grad_norm": 0.6749283671379089, "learning_rate": 9.824889693937245e-05, "loss": 3.5214, "step": 1590 }, { "epoch": 0.13052523699718166, "grad_norm": 0.952836811542511, "learning_rate": 9.824177724318162e-05, "loss": 3.5135, "step": 1592 }, { "epoch": 0.13068921342557008, "grad_norm": 0.7100101113319397, "learning_rate": 9.823464336160959e-05, "loss": 3.5523, "step": 1594 }, { "epoch": 0.1308531898539585, "grad_norm": 0.7093891501426697, "learning_rate": 9.822749529675406e-05, "loss": 3.5338, "step": 1596 }, { "epoch": 0.13101716628234691, "grad_norm": 0.716699481010437, "learning_rate": 9.822033305071689e-05, "loss": 3.5206, "step": 1598 }, { "epoch": 0.13118114271073533, "grad_norm": 0.6527066230773926, "learning_rate": 9.821315662560415e-05, "loss": 3.5264, "step": 1600 }, { "epoch": 0.13134511913912375, "grad_norm": 0.6964712738990784, "learning_rate": 9.820596602352601e-05, "loss": 3.4957, "step": 1602 }, { "epoch": 0.13150909556751217, "grad_norm": 0.7309548854827881, "learning_rate": 9.819876124659687e-05, "loss": 3.5073, "step": 1604 }, { "epoch": 0.1316730719959006, "grad_norm": 0.7031247019767761, "learning_rate": 9.819154229693529e-05, "loss": 3.4814, "step": 1606 }, { "epoch": 0.131837048424289, "grad_norm": 0.6443150639533997, "learning_rate": 9.818430917666397e-05, "loss": 3.4822, "step": 1608 }, { "epoch": 0.13200102485267742, "grad_norm": 0.6941884160041809, "learning_rate": 9.817706188790979e-05, "loss": 3.4766, "step": 1610 }, { "epoch": 0.13216500128106584, "grad_norm": 0.6277130246162415, "learning_rate": 9.81698004328038e-05, "loss": 3.4984, "step": 1612 }, { "epoch": 0.13232897770945426, "grad_norm": 0.7393566966056824, "learning_rate": 9.816252481348122e-05, "loss": 3.4467, "step": 1614 }, { "epoch": 0.13249295413784268, "grad_norm": 0.8626520037651062, "learning_rate": 9.815523503208141e-05, "loss": 3.5066, "step": 1616 }, { "epoch": 0.1326569305662311, "grad_norm": 0.7005507946014404, "learning_rate": 9.814793109074795e-05, "loss": 3.5249, "step": 1618 }, { "epoch": 0.13282090699461951, "grad_norm": 0.695397675037384, "learning_rate": 9.814061299162853e-05, "loss": 3.5428, "step": 1620 }, { "epoch": 0.13298488342300793, "grad_norm": 0.7181118130683899, "learning_rate": 9.8133280736875e-05, "loss": 3.4943, "step": 1622 }, { "epoch": 0.13314885985139635, "grad_norm": 0.7019610404968262, "learning_rate": 9.812593432864343e-05, "loss": 3.4751, "step": 1624 }, { "epoch": 0.13331283627978477, "grad_norm": 0.629170298576355, "learning_rate": 9.811857376909398e-05, "loss": 3.4791, "step": 1626 }, { "epoch": 0.1334768127081732, "grad_norm": 0.7574000358581543, "learning_rate": 9.8111199060391e-05, "loss": 3.5068, "step": 1628 }, { "epoch": 0.13364078913656163, "grad_norm": 0.6433237195014954, "learning_rate": 9.810381020470303e-05, "loss": 3.5356, "step": 1630 }, { "epoch": 0.13380476556495005, "grad_norm": 0.8434267044067383, "learning_rate": 9.809640720420275e-05, "loss": 3.5549, "step": 1632 }, { "epoch": 0.13396874199333847, "grad_norm": 0.7503165602684021, "learning_rate": 9.808899006106697e-05, "loss": 3.4256, "step": 1634 }, { "epoch": 0.1341327184217269, "grad_norm": 0.6501772403717041, "learning_rate": 9.808155877747671e-05, "loss": 3.4676, "step": 1636 }, { "epoch": 0.1342966948501153, "grad_norm": 0.6620165109634399, "learning_rate": 9.80741133556171e-05, "loss": 3.4372, "step": 1638 }, { "epoch": 0.13446067127850372, "grad_norm": 0.7528221607208252, "learning_rate": 9.806665379767746e-05, "loss": 3.4309, "step": 1640 }, { "epoch": 0.13462464770689214, "grad_norm": 0.6489667892456055, "learning_rate": 9.805918010585124e-05, "loss": 3.4438, "step": 1642 }, { "epoch": 0.13478862413528056, "grad_norm": 0.5966881513595581, "learning_rate": 9.805169228233608e-05, "loss": 3.4465, "step": 1644 }, { "epoch": 0.13495260056366898, "grad_norm": 0.6900391578674316, "learning_rate": 9.804419032933377e-05, "loss": 3.5272, "step": 1646 }, { "epoch": 0.1351165769920574, "grad_norm": 0.7607645988464355, "learning_rate": 9.80366742490502e-05, "loss": 3.4956, "step": 1648 }, { "epoch": 0.13528055342044581, "grad_norm": 0.6069225668907166, "learning_rate": 9.802914404369548e-05, "loss": 3.4641, "step": 1650 }, { "epoch": 0.13544452984883423, "grad_norm": 0.6781127452850342, "learning_rate": 9.802159971548386e-05, "loss": 3.5116, "step": 1652 }, { "epoch": 0.13560850627722265, "grad_norm": 0.5579132437705994, "learning_rate": 9.801404126663372e-05, "loss": 3.5021, "step": 1654 }, { "epoch": 0.13577248270561107, "grad_norm": 0.8519318103790283, "learning_rate": 9.800646869936758e-05, "loss": 3.4645, "step": 1656 }, { "epoch": 0.1359364591339995, "grad_norm": 0.8295395374298096, "learning_rate": 9.799888201591219e-05, "loss": 3.4875, "step": 1658 }, { "epoch": 0.1361004355623879, "grad_norm": 0.7860473990440369, "learning_rate": 9.799128121849835e-05, "loss": 3.5143, "step": 1660 }, { "epoch": 0.13626441199077632, "grad_norm": 0.676199197769165, "learning_rate": 9.798366630936107e-05, "loss": 3.4924, "step": 1662 }, { "epoch": 0.13642838841916474, "grad_norm": 0.7471193075180054, "learning_rate": 9.797603729073949e-05, "loss": 3.4606, "step": 1664 }, { "epoch": 0.13659236484755316, "grad_norm": 0.7911469340324402, "learning_rate": 9.796839416487693e-05, "loss": 3.487, "step": 1666 }, { "epoch": 0.13675634127594158, "grad_norm": 0.7229553461074829, "learning_rate": 9.796073693402081e-05, "loss": 3.5058, "step": 1668 }, { "epoch": 0.13692031770433, "grad_norm": 0.7046807408332825, "learning_rate": 9.795306560042272e-05, "loss": 3.4739, "step": 1670 }, { "epoch": 0.13708429413271842, "grad_norm": 0.7285602688789368, "learning_rate": 9.794538016633842e-05, "loss": 3.4592, "step": 1672 }, { "epoch": 0.13724827056110683, "grad_norm": 0.7747913002967834, "learning_rate": 9.793768063402777e-05, "loss": 3.4593, "step": 1674 }, { "epoch": 0.13741224698949525, "grad_norm": 0.7013533711433411, "learning_rate": 9.792996700575481e-05, "loss": 3.5569, "step": 1676 }, { "epoch": 0.13757622341788367, "grad_norm": 0.6556512117385864, "learning_rate": 9.792223928378772e-05, "loss": 3.4342, "step": 1678 }, { "epoch": 0.1377401998462721, "grad_norm": 0.7427647709846497, "learning_rate": 9.79144974703988e-05, "loss": 3.45, "step": 1680 }, { "epoch": 0.1379041762746605, "grad_norm": 0.7319619059562683, "learning_rate": 9.790674156786452e-05, "loss": 3.5378, "step": 1682 }, { "epoch": 0.13806815270304892, "grad_norm": 0.6408218741416931, "learning_rate": 9.78989715784655e-05, "loss": 3.5022, "step": 1684 }, { "epoch": 0.13823212913143734, "grad_norm": 0.5858979821205139, "learning_rate": 9.789118750448647e-05, "loss": 3.461, "step": 1686 }, { "epoch": 0.1383961055598258, "grad_norm": 0.6623833775520325, "learning_rate": 9.788338934821632e-05, "loss": 3.5288, "step": 1688 }, { "epoch": 0.1385600819882142, "grad_norm": 0.6391535997390747, "learning_rate": 9.787557711194808e-05, "loss": 3.5074, "step": 1690 }, { "epoch": 0.13872405841660262, "grad_norm": 0.5775202512741089, "learning_rate": 9.786775079797893e-05, "loss": 3.4467, "step": 1692 }, { "epoch": 0.13888803484499104, "grad_norm": 0.6308355927467346, "learning_rate": 9.785991040861017e-05, "loss": 3.4568, "step": 1694 }, { "epoch": 0.13905201127337946, "grad_norm": 0.7259300351142883, "learning_rate": 9.785205594614725e-05, "loss": 3.5018, "step": 1696 }, { "epoch": 0.13921598770176788, "grad_norm": 0.7123456001281738, "learning_rate": 9.784418741289975e-05, "loss": 3.4491, "step": 1698 }, { "epoch": 0.1393799641301563, "grad_norm": 0.7426223754882812, "learning_rate": 9.783630481118141e-05, "loss": 3.5045, "step": 1700 }, { "epoch": 0.13954394055854472, "grad_norm": 0.8505781292915344, "learning_rate": 9.782840814331007e-05, "loss": 3.5193, "step": 1702 }, { "epoch": 0.13970791698693313, "grad_norm": 0.7968536615371704, "learning_rate": 9.782049741160775e-05, "loss": 3.499, "step": 1704 }, { "epoch": 0.13987189341532155, "grad_norm": 0.7513880729675293, "learning_rate": 9.781257261840055e-05, "loss": 3.4711, "step": 1706 }, { "epoch": 0.14003586984370997, "grad_norm": 0.8544629812240601, "learning_rate": 9.780463376601878e-05, "loss": 3.4969, "step": 1708 }, { "epoch": 0.1401998462720984, "grad_norm": 0.5832239985466003, "learning_rate": 9.77966808567968e-05, "loss": 3.3526, "step": 1710 }, { "epoch": 0.1403638227004868, "grad_norm": 0.7072123885154724, "learning_rate": 9.778871389307318e-05, "loss": 3.4907, "step": 1712 }, { "epoch": 0.14052779912887522, "grad_norm": 0.8044034242630005, "learning_rate": 9.778073287719054e-05, "loss": 3.5346, "step": 1714 }, { "epoch": 0.14069177555726364, "grad_norm": 0.6524981260299683, "learning_rate": 9.777273781149574e-05, "loss": 3.429, "step": 1716 }, { "epoch": 0.14085575198565206, "grad_norm": 0.5953693389892578, "learning_rate": 9.776472869833965e-05, "loss": 3.4326, "step": 1718 }, { "epoch": 0.14101972841404048, "grad_norm": 0.586199164390564, "learning_rate": 9.775670554007736e-05, "loss": 3.382, "step": 1720 }, { "epoch": 0.1411837048424289, "grad_norm": 0.6465304493904114, "learning_rate": 9.774866833906808e-05, "loss": 3.4143, "step": 1722 }, { "epoch": 0.14134768127081732, "grad_norm": 0.8190158009529114, "learning_rate": 9.774061709767508e-05, "loss": 3.4286, "step": 1724 }, { "epoch": 0.14151165769920573, "grad_norm": 0.778614342212677, "learning_rate": 9.773255181826586e-05, "loss": 3.4458, "step": 1726 }, { "epoch": 0.14167563412759415, "grad_norm": 0.8797032237052917, "learning_rate": 9.772447250321197e-05, "loss": 3.4719, "step": 1728 }, { "epoch": 0.14183961055598257, "grad_norm": 0.6563115119934082, "learning_rate": 9.771637915488911e-05, "loss": 3.3792, "step": 1730 }, { "epoch": 0.142003586984371, "grad_norm": 0.823006272315979, "learning_rate": 9.770827177567712e-05, "loss": 3.4733, "step": 1732 }, { "epoch": 0.1421675634127594, "grad_norm": 0.7860798239707947, "learning_rate": 9.770015036795996e-05, "loss": 3.5151, "step": 1734 }, { "epoch": 0.14233153984114782, "grad_norm": 0.8051521182060242, "learning_rate": 9.76920149341257e-05, "loss": 3.4156, "step": 1736 }, { "epoch": 0.14249551626953624, "grad_norm": 0.6009500026702881, "learning_rate": 9.768386547656655e-05, "loss": 3.436, "step": 1738 }, { "epoch": 0.14265949269792466, "grad_norm": 0.68117356300354, "learning_rate": 9.767570199767883e-05, "loss": 3.4671, "step": 1740 }, { "epoch": 0.14282346912631308, "grad_norm": 0.6417118906974792, "learning_rate": 9.766752449986301e-05, "loss": 3.4416, "step": 1742 }, { "epoch": 0.14298744555470153, "grad_norm": 0.6248669624328613, "learning_rate": 9.765933298552366e-05, "loss": 3.3769, "step": 1744 }, { "epoch": 0.14315142198308994, "grad_norm": 0.5746626853942871, "learning_rate": 9.765112745706945e-05, "loss": 3.408, "step": 1746 }, { "epoch": 0.14331539841147836, "grad_norm": 0.6235172748565674, "learning_rate": 9.764290791691324e-05, "loss": 3.4454, "step": 1748 }, { "epoch": 0.14347937483986678, "grad_norm": 0.63954758644104, "learning_rate": 9.763467436747193e-05, "loss": 3.4275, "step": 1750 }, { "epoch": 0.1436433512682552, "grad_norm": 0.6917594075202942, "learning_rate": 9.76264268111666e-05, "loss": 3.3796, "step": 1752 }, { "epoch": 0.14380732769664362, "grad_norm": 0.6280871629714966, "learning_rate": 9.76181652504224e-05, "loss": 3.4056, "step": 1754 }, { "epoch": 0.14397130412503203, "grad_norm": 0.5994766354560852, "learning_rate": 9.760988968766864e-05, "loss": 3.458, "step": 1756 }, { "epoch": 0.14413528055342045, "grad_norm": 0.7142448425292969, "learning_rate": 9.760160012533872e-05, "loss": 3.4442, "step": 1758 }, { "epoch": 0.14429925698180887, "grad_norm": 0.632342517375946, "learning_rate": 9.759329656587017e-05, "loss": 3.4396, "step": 1760 }, { "epoch": 0.1444632334101973, "grad_norm": 0.6980354189872742, "learning_rate": 9.758497901170465e-05, "loss": 3.4684, "step": 1762 }, { "epoch": 0.1446272098385857, "grad_norm": 0.6333186626434326, "learning_rate": 9.75766474652879e-05, "loss": 3.4394, "step": 1764 }, { "epoch": 0.14479118626697413, "grad_norm": 0.5935460329055786, "learning_rate": 9.756830192906978e-05, "loss": 3.3757, "step": 1766 }, { "epoch": 0.14495516269536254, "grad_norm": 0.6060703992843628, "learning_rate": 9.75599424055043e-05, "loss": 3.3985, "step": 1768 }, { "epoch": 0.14511913912375096, "grad_norm": 0.7013797760009766, "learning_rate": 9.755156889704953e-05, "loss": 3.4613, "step": 1770 }, { "epoch": 0.14528311555213938, "grad_norm": 0.6497318744659424, "learning_rate": 9.75431814061677e-05, "loss": 3.3849, "step": 1772 }, { "epoch": 0.1454470919805278, "grad_norm": 0.6609060764312744, "learning_rate": 9.753477993532514e-05, "loss": 3.3863, "step": 1774 }, { "epoch": 0.14561106840891622, "grad_norm": 0.6392355561256409, "learning_rate": 9.752636448699227e-05, "loss": 3.4147, "step": 1776 }, { "epoch": 0.14577504483730463, "grad_norm": 0.729839563369751, "learning_rate": 9.751793506364362e-05, "loss": 3.3952, "step": 1778 }, { "epoch": 0.14593902126569305, "grad_norm": 0.6394525170326233, "learning_rate": 9.750949166775786e-05, "loss": 3.4272, "step": 1780 }, { "epoch": 0.14610299769408147, "grad_norm": 0.7037297487258911, "learning_rate": 9.750103430181776e-05, "loss": 3.3667, "step": 1782 }, { "epoch": 0.1462669741224699, "grad_norm": 0.6344433426856995, "learning_rate": 9.749256296831017e-05, "loss": 3.3558, "step": 1784 }, { "epoch": 0.1464309505508583, "grad_norm": 0.6455307006835938, "learning_rate": 9.748407766972607e-05, "loss": 3.3936, "step": 1786 }, { "epoch": 0.14659492697924673, "grad_norm": 0.7534605860710144, "learning_rate": 9.747557840856055e-05, "loss": 3.3973, "step": 1788 }, { "epoch": 0.14675890340763514, "grad_norm": 0.6741543412208557, "learning_rate": 9.746706518731278e-05, "loss": 3.4123, "step": 1790 }, { "epoch": 0.14692287983602356, "grad_norm": 0.7014438509941101, "learning_rate": 9.745853800848606e-05, "loss": 3.4118, "step": 1792 }, { "epoch": 0.14708685626441198, "grad_norm": 0.6591073870658875, "learning_rate": 9.74499968745878e-05, "loss": 3.4319, "step": 1794 }, { "epoch": 0.1472508326928004, "grad_norm": 0.6363744735717773, "learning_rate": 9.74414417881295e-05, "loss": 3.384, "step": 1796 }, { "epoch": 0.14741480912118882, "grad_norm": 0.6241241097450256, "learning_rate": 9.743287275162673e-05, "loss": 3.4458, "step": 1798 }, { "epoch": 0.14757878554957723, "grad_norm": 0.7173709869384766, "learning_rate": 9.742428976759925e-05, "loss": 3.4145, "step": 1800 }, { "epoch": 0.14774276197796568, "grad_norm": 0.6002538800239563, "learning_rate": 9.741569283857082e-05, "loss": 3.3948, "step": 1802 }, { "epoch": 0.1479067384063541, "grad_norm": 0.7210296988487244, "learning_rate": 9.740708196706936e-05, "loss": 3.3912, "step": 1804 }, { "epoch": 0.14807071483474252, "grad_norm": 0.5635441541671753, "learning_rate": 9.739845715562688e-05, "loss": 3.4489, "step": 1806 }, { "epoch": 0.14823469126313094, "grad_norm": 0.6474645733833313, "learning_rate": 9.738981840677948e-05, "loss": 3.429, "step": 1808 }, { "epoch": 0.14839866769151935, "grad_norm": 0.6213793754577637, "learning_rate": 9.738116572306737e-05, "loss": 3.4043, "step": 1810 }, { "epoch": 0.14856264411990777, "grad_norm": 0.5918754935264587, "learning_rate": 9.737249910703485e-05, "loss": 3.371, "step": 1812 }, { "epoch": 0.1487266205482962, "grad_norm": 0.5595375299453735, "learning_rate": 9.736381856123034e-05, "loss": 3.3467, "step": 1814 }, { "epoch": 0.1488905969766846, "grad_norm": 0.5739578604698181, "learning_rate": 9.735512408820628e-05, "loss": 3.4073, "step": 1816 }, { "epoch": 0.14905457340507303, "grad_norm": 0.5913086533546448, "learning_rate": 9.73464156905193e-05, "loss": 3.37, "step": 1818 }, { "epoch": 0.14921854983346144, "grad_norm": 0.5342605710029602, "learning_rate": 9.733769337073009e-05, "loss": 3.3615, "step": 1820 }, { "epoch": 0.14938252626184986, "grad_norm": 0.6710630655288696, "learning_rate": 9.73289571314034e-05, "loss": 3.3903, "step": 1822 }, { "epoch": 0.14954650269023828, "grad_norm": 0.6280022263526917, "learning_rate": 9.732020697510811e-05, "loss": 3.366, "step": 1824 }, { "epoch": 0.1497104791186267, "grad_norm": 0.6352916955947876, "learning_rate": 9.731144290441718e-05, "loss": 3.4006, "step": 1826 }, { "epoch": 0.14987445554701512, "grad_norm": 0.6925874948501587, "learning_rate": 9.730266492190769e-05, "loss": 3.4513, "step": 1828 }, { "epoch": 0.15003843197540354, "grad_norm": 0.7678630948066711, "learning_rate": 9.729387303016076e-05, "loss": 3.3515, "step": 1830 }, { "epoch": 0.15020240840379195, "grad_norm": 0.7654356956481934, "learning_rate": 9.728506723176162e-05, "loss": 3.357, "step": 1832 }, { "epoch": 0.15036638483218037, "grad_norm": 0.6860572695732117, "learning_rate": 9.727624752929962e-05, "loss": 3.4024, "step": 1834 }, { "epoch": 0.1505303612605688, "grad_norm": 0.6397068500518799, "learning_rate": 9.726741392536815e-05, "loss": 3.4281, "step": 1836 }, { "epoch": 0.1506943376889572, "grad_norm": 0.6445949673652649, "learning_rate": 9.725856642256472e-05, "loss": 3.3339, "step": 1838 }, { "epoch": 0.15085831411734563, "grad_norm": 0.6427408456802368, "learning_rate": 9.724970502349091e-05, "loss": 3.3984, "step": 1840 }, { "epoch": 0.15102229054573404, "grad_norm": 0.6301809549331665, "learning_rate": 9.72408297307524e-05, "loss": 3.3884, "step": 1842 }, { "epoch": 0.15118626697412246, "grad_norm": 0.561808705329895, "learning_rate": 9.723194054695894e-05, "loss": 3.39, "step": 1844 }, { "epoch": 0.15135024340251088, "grad_norm": 0.5919866561889648, "learning_rate": 9.722303747472441e-05, "loss": 3.3677, "step": 1846 }, { "epoch": 0.1515142198308993, "grad_norm": 0.6336367726325989, "learning_rate": 9.721412051666668e-05, "loss": 3.3986, "step": 1848 }, { "epoch": 0.15167819625928772, "grad_norm": 0.687470018863678, "learning_rate": 9.720518967540781e-05, "loss": 3.3543, "step": 1850 }, { "epoch": 0.15184217268767614, "grad_norm": 0.7600200176239014, "learning_rate": 9.719624495357387e-05, "loss": 3.4157, "step": 1852 }, { "epoch": 0.15200614911606455, "grad_norm": 0.6732688546180725, "learning_rate": 9.718728635379502e-05, "loss": 3.4003, "step": 1854 }, { "epoch": 0.15217012554445297, "grad_norm": 0.7202364206314087, "learning_rate": 9.717831387870555e-05, "loss": 3.3777, "step": 1856 }, { "epoch": 0.1523341019728414, "grad_norm": 0.6364483833312988, "learning_rate": 9.716932753094376e-05, "loss": 3.3864, "step": 1858 }, { "epoch": 0.15249807840122984, "grad_norm": 0.5882256627082825, "learning_rate": 9.71603273131521e-05, "loss": 3.3319, "step": 1860 }, { "epoch": 0.15266205482961825, "grad_norm": 0.716076672077179, "learning_rate": 9.715131322797704e-05, "loss": 3.4332, "step": 1862 }, { "epoch": 0.15282603125800667, "grad_norm": 0.6526336073875427, "learning_rate": 9.714228527806915e-05, "loss": 3.3591, "step": 1864 }, { "epoch": 0.1529900076863951, "grad_norm": 0.588830292224884, "learning_rate": 9.71332434660831e-05, "loss": 3.4014, "step": 1866 }, { "epoch": 0.1531539841147835, "grad_norm": 0.5935143232345581, "learning_rate": 9.712418779467758e-05, "loss": 3.3398, "step": 1868 }, { "epoch": 0.15331796054317193, "grad_norm": 0.6331619620323181, "learning_rate": 9.71151182665154e-05, "loss": 3.3374, "step": 1870 }, { "epoch": 0.15348193697156035, "grad_norm": 0.5878372192382812, "learning_rate": 9.710603488426345e-05, "loss": 3.3286, "step": 1872 }, { "epoch": 0.15364591339994876, "grad_norm": 0.5949060320854187, "learning_rate": 9.709693765059266e-05, "loss": 3.3715, "step": 1874 }, { "epoch": 0.15380988982833718, "grad_norm": 0.665457546710968, "learning_rate": 9.708782656817807e-05, "loss": 3.355, "step": 1876 }, { "epoch": 0.1539738662567256, "grad_norm": 0.6434701681137085, "learning_rate": 9.707870163969874e-05, "loss": 3.3317, "step": 1878 }, { "epoch": 0.15413784268511402, "grad_norm": 0.6445391774177551, "learning_rate": 9.706956286783786e-05, "loss": 3.3943, "step": 1880 }, { "epoch": 0.15430181911350244, "grad_norm": 0.6335451602935791, "learning_rate": 9.706041025528266e-05, "loss": 3.3645, "step": 1882 }, { "epoch": 0.15446579554189085, "grad_norm": 0.6084844470024109, "learning_rate": 9.705124380472443e-05, "loss": 3.3914, "step": 1884 }, { "epoch": 0.15462977197027927, "grad_norm": 0.6261113286018372, "learning_rate": 9.704206351885857e-05, "loss": 3.3381, "step": 1886 }, { "epoch": 0.1547937483986677, "grad_norm": 0.6548987030982971, "learning_rate": 9.703286940038449e-05, "loss": 3.3277, "step": 1888 }, { "epoch": 0.1549577248270561, "grad_norm": 0.6208562254905701, "learning_rate": 9.702366145200573e-05, "loss": 3.3789, "step": 1890 }, { "epoch": 0.15512170125544453, "grad_norm": 0.6488550901412964, "learning_rate": 9.701443967642984e-05, "loss": 3.3861, "step": 1892 }, { "epoch": 0.15528567768383295, "grad_norm": 0.6071347594261169, "learning_rate": 9.700520407636849e-05, "loss": 3.4027, "step": 1894 }, { "epoch": 0.15544965411222136, "grad_norm": 0.6656597852706909, "learning_rate": 9.699595465453734e-05, "loss": 3.3003, "step": 1896 }, { "epoch": 0.15561363054060978, "grad_norm": 0.6349019408226013, "learning_rate": 9.69866914136562e-05, "loss": 3.398, "step": 1898 }, { "epoch": 0.1557776069689982, "grad_norm": 0.6088286638259888, "learning_rate": 9.69774143564489e-05, "loss": 3.3114, "step": 1900 }, { "epoch": 0.15594158339738662, "grad_norm": 0.8147541284561157, "learning_rate": 9.696812348564331e-05, "loss": 3.3856, "step": 1902 }, { "epoch": 0.15610555982577504, "grad_norm": 0.6513493657112122, "learning_rate": 9.695881880397143e-05, "loss": 3.4239, "step": 1904 }, { "epoch": 0.15626953625416345, "grad_norm": 0.6540910601615906, "learning_rate": 9.694950031416925e-05, "loss": 3.3575, "step": 1906 }, { "epoch": 0.15643351268255187, "grad_norm": 0.6017822027206421, "learning_rate": 9.694016801897685e-05, "loss": 3.3468, "step": 1908 }, { "epoch": 0.1565974891109403, "grad_norm": 0.7597635984420776, "learning_rate": 9.693082192113839e-05, "loss": 3.3359, "step": 1910 }, { "epoch": 0.1567614655393287, "grad_norm": 0.8275761008262634, "learning_rate": 9.692146202340206e-05, "loss": 3.3465, "step": 1912 }, { "epoch": 0.15692544196771713, "grad_norm": 0.8236324787139893, "learning_rate": 9.69120883285201e-05, "loss": 3.3518, "step": 1914 }, { "epoch": 0.15708941839610557, "grad_norm": 0.7957652807235718, "learning_rate": 9.690270083924883e-05, "loss": 3.3424, "step": 1916 }, { "epoch": 0.157253394824494, "grad_norm": 0.7953089475631714, "learning_rate": 9.689329955834865e-05, "loss": 3.353, "step": 1918 }, { "epoch": 0.1574173712528824, "grad_norm": 0.7492114305496216, "learning_rate": 9.688388448858394e-05, "loss": 3.3389, "step": 1920 }, { "epoch": 0.15758134768127083, "grad_norm": 0.612477719783783, "learning_rate": 9.68744556327232e-05, "loss": 3.4137, "step": 1922 }, { "epoch": 0.15774532410965925, "grad_norm": 0.6381865739822388, "learning_rate": 9.686501299353895e-05, "loss": 3.3281, "step": 1924 }, { "epoch": 0.15790930053804766, "grad_norm": 0.6546152234077454, "learning_rate": 9.68555565738078e-05, "loss": 3.3501, "step": 1926 }, { "epoch": 0.15807327696643608, "grad_norm": 0.6780794262886047, "learning_rate": 9.684608637631036e-05, "loss": 3.3393, "step": 1928 }, { "epoch": 0.1582372533948245, "grad_norm": 0.637367308139801, "learning_rate": 9.683660240383135e-05, "loss": 3.3353, "step": 1930 }, { "epoch": 0.15840122982321292, "grad_norm": 0.647280216217041, "learning_rate": 9.68271046591595e-05, "loss": 3.4004, "step": 1932 }, { "epoch": 0.15856520625160134, "grad_norm": 0.6508013010025024, "learning_rate": 9.681759314508758e-05, "loss": 3.3203, "step": 1934 }, { "epoch": 0.15872918267998976, "grad_norm": 0.6182466745376587, "learning_rate": 9.680806786441244e-05, "loss": 3.3234, "step": 1936 }, { "epoch": 0.15889315910837817, "grad_norm": 0.6148533821105957, "learning_rate": 9.679852881993496e-05, "loss": 3.4097, "step": 1938 }, { "epoch": 0.1590571355367666, "grad_norm": 0.7295002937316895, "learning_rate": 9.678897601446008e-05, "loss": 3.3832, "step": 1940 }, { "epoch": 0.159221111965155, "grad_norm": 0.611003041267395, "learning_rate": 9.67794094507968e-05, "loss": 3.3381, "step": 1942 }, { "epoch": 0.15938508839354343, "grad_norm": 0.6375026106834412, "learning_rate": 9.676982913175813e-05, "loss": 3.3224, "step": 1944 }, { "epoch": 0.15954906482193185, "grad_norm": 0.5718812942504883, "learning_rate": 9.676023506016112e-05, "loss": 3.3178, "step": 1946 }, { "epoch": 0.15971304125032026, "grad_norm": 0.5863606333732605, "learning_rate": 9.675062723882691e-05, "loss": 3.3181, "step": 1948 }, { "epoch": 0.15987701767870868, "grad_norm": 0.6420906782150269, "learning_rate": 9.674100567058064e-05, "loss": 3.3457, "step": 1950 }, { "epoch": 0.1600409941070971, "grad_norm": 0.6146227121353149, "learning_rate": 9.673137035825153e-05, "loss": 3.3324, "step": 1952 }, { "epoch": 0.16020497053548552, "grad_norm": 0.6017966866493225, "learning_rate": 9.672172130467281e-05, "loss": 3.3241, "step": 1954 }, { "epoch": 0.16036894696387394, "grad_norm": 0.6874404549598694, "learning_rate": 9.671205851268175e-05, "loss": 3.3436, "step": 1956 }, { "epoch": 0.16053292339226236, "grad_norm": 0.6882346868515015, "learning_rate": 9.670238198511969e-05, "loss": 3.3263, "step": 1958 }, { "epoch": 0.16069689982065077, "grad_norm": 0.7165938019752502, "learning_rate": 9.669269172483197e-05, "loss": 3.2836, "step": 1960 }, { "epoch": 0.1608608762490392, "grad_norm": 0.7419902086257935, "learning_rate": 9.668298773466802e-05, "loss": 3.3599, "step": 1962 }, { "epoch": 0.1610248526774276, "grad_norm": 0.7155027985572815, "learning_rate": 9.667327001748125e-05, "loss": 3.328, "step": 1964 }, { "epoch": 0.16118882910581603, "grad_norm": 0.5572860836982727, "learning_rate": 9.666353857612913e-05, "loss": 3.2618, "step": 1966 }, { "epoch": 0.16135280553420445, "grad_norm": 0.6426743865013123, "learning_rate": 9.665379341347318e-05, "loss": 3.3289, "step": 1968 }, { "epoch": 0.16151678196259286, "grad_norm": 0.7807396054267883, "learning_rate": 9.664403453237894e-05, "loss": 3.3503, "step": 1970 }, { "epoch": 0.16168075839098128, "grad_norm": 0.6256475448608398, "learning_rate": 9.663426193571598e-05, "loss": 3.368, "step": 1972 }, { "epoch": 0.16184473481936973, "grad_norm": 0.6786140203475952, "learning_rate": 9.662447562635791e-05, "loss": 3.3482, "step": 1974 }, { "epoch": 0.16200871124775815, "grad_norm": 0.6162734627723694, "learning_rate": 9.661467560718237e-05, "loss": 3.3226, "step": 1976 }, { "epoch": 0.16217268767614657, "grad_norm": 0.609999418258667, "learning_rate": 9.660486188107104e-05, "loss": 3.3118, "step": 1978 }, { "epoch": 0.16233666410453498, "grad_norm": 0.7471441626548767, "learning_rate": 9.659503445090963e-05, "loss": 3.3642, "step": 1980 }, { "epoch": 0.1625006405329234, "grad_norm": 0.6361717581748962, "learning_rate": 9.658519331958785e-05, "loss": 3.2547, "step": 1982 }, { "epoch": 0.16266461696131182, "grad_norm": 0.5587472915649414, "learning_rate": 9.657533848999947e-05, "loss": 3.3375, "step": 1984 }, { "epoch": 0.16282859338970024, "grad_norm": 0.6860288381576538, "learning_rate": 9.65654699650423e-05, "loss": 3.2796, "step": 1986 }, { "epoch": 0.16299256981808866, "grad_norm": 0.6986459493637085, "learning_rate": 9.655558774761813e-05, "loss": 3.3553, "step": 1988 }, { "epoch": 0.16315654624647707, "grad_norm": 0.7562621235847473, "learning_rate": 9.654569184063282e-05, "loss": 3.3779, "step": 1990 }, { "epoch": 0.1633205226748655, "grad_norm": 0.7100428938865662, "learning_rate": 9.653578224699622e-05, "loss": 3.3995, "step": 1992 }, { "epoch": 0.1634844991032539, "grad_norm": 0.776755690574646, "learning_rate": 9.652585896962223e-05, "loss": 3.3391, "step": 1994 }, { "epoch": 0.16364847553164233, "grad_norm": 0.6308813095092773, "learning_rate": 9.651592201142879e-05, "loss": 3.2949, "step": 1996 }, { "epoch": 0.16381245196003075, "grad_norm": 0.7114334106445312, "learning_rate": 9.650597137533782e-05, "loss": 3.3665, "step": 1998 }, { "epoch": 0.16397642838841917, "grad_norm": 0.6608272194862366, "learning_rate": 9.649600706427525e-05, "loss": 3.2825, "step": 2000 }, { "epoch": 0.16414040481680758, "grad_norm": 0.6543484926223755, "learning_rate": 9.648602908117112e-05, "loss": 3.3447, "step": 2002 }, { "epoch": 0.164304381245196, "grad_norm": 0.6831576228141785, "learning_rate": 9.647603742895939e-05, "loss": 3.3979, "step": 2004 }, { "epoch": 0.16446835767358442, "grad_norm": 0.717369019985199, "learning_rate": 9.646603211057809e-05, "loss": 3.3508, "step": 2006 }, { "epoch": 0.16463233410197284, "grad_norm": 0.6552402973175049, "learning_rate": 9.645601312896929e-05, "loss": 3.326, "step": 2008 }, { "epoch": 0.16479631053036126, "grad_norm": 0.7372413277626038, "learning_rate": 9.644598048707901e-05, "loss": 3.3048, "step": 2010 }, { "epoch": 0.16496028695874967, "grad_norm": 0.5617173910140991, "learning_rate": 9.643593418785734e-05, "loss": 3.3341, "step": 2012 }, { "epoch": 0.1651242633871381, "grad_norm": 0.6190782785415649, "learning_rate": 9.642587423425839e-05, "loss": 3.3441, "step": 2014 }, { "epoch": 0.1652882398155265, "grad_norm": 0.6181708574295044, "learning_rate": 9.641580062924022e-05, "loss": 3.3729, "step": 2016 }, { "epoch": 0.16545221624391493, "grad_norm": 0.5956866145133972, "learning_rate": 9.640571337576499e-05, "loss": 3.3385, "step": 2018 }, { "epoch": 0.16561619267230335, "grad_norm": 0.7407371401786804, "learning_rate": 9.639561247679883e-05, "loss": 3.2941, "step": 2020 }, { "epoch": 0.16578016910069177, "grad_norm": 0.6292521953582764, "learning_rate": 9.638549793531186e-05, "loss": 3.3027, "step": 2022 }, { "epoch": 0.16594414552908018, "grad_norm": 0.6599383354187012, "learning_rate": 9.637536975427826e-05, "loss": 3.2438, "step": 2024 }, { "epoch": 0.1661081219574686, "grad_norm": 0.6305571794509888, "learning_rate": 9.636522793667617e-05, "loss": 3.3542, "step": 2026 }, { "epoch": 0.16627209838585702, "grad_norm": 0.6023452877998352, "learning_rate": 9.635507248548781e-05, "loss": 3.2435, "step": 2028 }, { "epoch": 0.16643607481424547, "grad_norm": 0.60383540391922, "learning_rate": 9.634490340369933e-05, "loss": 3.3507, "step": 2030 }, { "epoch": 0.16660005124263388, "grad_norm": 0.6280787587165833, "learning_rate": 9.633472069430094e-05, "loss": 3.343, "step": 2032 }, { "epoch": 0.1667640276710223, "grad_norm": 0.6468386650085449, "learning_rate": 9.632452436028685e-05, "loss": 3.2778, "step": 2034 }, { "epoch": 0.16692800409941072, "grad_norm": 0.6163133978843689, "learning_rate": 9.631431440465526e-05, "loss": 3.2935, "step": 2036 }, { "epoch": 0.16709198052779914, "grad_norm": 0.6122549772262573, "learning_rate": 9.630409083040837e-05, "loss": 3.3503, "step": 2038 }, { "epoch": 0.16725595695618756, "grad_norm": 0.7354429364204407, "learning_rate": 9.629385364055242e-05, "loss": 3.3263, "step": 2040 }, { "epoch": 0.16741993338457598, "grad_norm": 0.5872082710266113, "learning_rate": 9.628360283809761e-05, "loss": 3.2704, "step": 2042 }, { "epoch": 0.1675839098129644, "grad_norm": 0.6023679375648499, "learning_rate": 9.627333842605819e-05, "loss": 3.2958, "step": 2044 }, { "epoch": 0.1677478862413528, "grad_norm": 0.5730355978012085, "learning_rate": 9.626306040745237e-05, "loss": 3.3174, "step": 2046 }, { "epoch": 0.16791186266974123, "grad_norm": 0.5722047090530396, "learning_rate": 9.625276878530237e-05, "loss": 3.3153, "step": 2048 }, { "epoch": 0.16807583909812965, "grad_norm": 0.6651049852371216, "learning_rate": 9.624246356263444e-05, "loss": 3.3299, "step": 2050 }, { "epoch": 0.16823981552651807, "grad_norm": 0.6372424364089966, "learning_rate": 9.623214474247878e-05, "loss": 3.3651, "step": 2052 }, { "epoch": 0.16840379195490648, "grad_norm": 0.5711623430252075, "learning_rate": 9.622181232786963e-05, "loss": 3.3216, "step": 2054 }, { "epoch": 0.1685677683832949, "grad_norm": 0.6460525393486023, "learning_rate": 9.621146632184521e-05, "loss": 3.3674, "step": 2056 }, { "epoch": 0.16873174481168332, "grad_norm": 0.6044295430183411, "learning_rate": 9.620110672744776e-05, "loss": 3.3083, "step": 2058 }, { "epoch": 0.16889572124007174, "grad_norm": 0.5659945011138916, "learning_rate": 9.619073354772344e-05, "loss": 3.3649, "step": 2060 }, { "epoch": 0.16905969766846016, "grad_norm": 0.5560106039047241, "learning_rate": 9.618034678572252e-05, "loss": 3.2848, "step": 2062 }, { "epoch": 0.16922367409684858, "grad_norm": 0.5644478797912598, "learning_rate": 9.616994644449915e-05, "loss": 3.3233, "step": 2064 }, { "epoch": 0.169387650525237, "grad_norm": 0.6400248408317566, "learning_rate": 9.615953252711157e-05, "loss": 3.3204, "step": 2066 }, { "epoch": 0.1695516269536254, "grad_norm": 0.5804336667060852, "learning_rate": 9.614910503662196e-05, "loss": 3.3332, "step": 2068 }, { "epoch": 0.16971560338201383, "grad_norm": 0.6843202710151672, "learning_rate": 9.613866397609646e-05, "loss": 3.3108, "step": 2070 }, { "epoch": 0.16987957981040225, "grad_norm": 0.6259203553199768, "learning_rate": 9.612820934860529e-05, "loss": 3.2955, "step": 2072 }, { "epoch": 0.17004355623879067, "grad_norm": 0.7539075016975403, "learning_rate": 9.611774115722258e-05, "loss": 3.3332, "step": 2074 }, { "epoch": 0.17020753266717908, "grad_norm": 0.6109238266944885, "learning_rate": 9.610725940502648e-05, "loss": 3.2937, "step": 2076 }, { "epoch": 0.1703715090955675, "grad_norm": 0.6348362565040588, "learning_rate": 9.609676409509912e-05, "loss": 3.2358, "step": 2078 }, { "epoch": 0.17053548552395592, "grad_norm": 0.7391447424888611, "learning_rate": 9.608625523052663e-05, "loss": 3.3637, "step": 2080 }, { "epoch": 0.17069946195234434, "grad_norm": 0.6116240620613098, "learning_rate": 9.607573281439913e-05, "loss": 3.299, "step": 2082 }, { "epoch": 0.17086343838073276, "grad_norm": 0.6683641672134399, "learning_rate": 9.60651968498107e-05, "loss": 3.3175, "step": 2084 }, { "epoch": 0.17102741480912118, "grad_norm": 0.5680612921714783, "learning_rate": 9.605464733985941e-05, "loss": 3.2804, "step": 2086 }, { "epoch": 0.17119139123750962, "grad_norm": 0.5645765066146851, "learning_rate": 9.60440842876473e-05, "loss": 3.307, "step": 2088 }, { "epoch": 0.17135536766589804, "grad_norm": 0.6338248252868652, "learning_rate": 9.603350769628045e-05, "loss": 3.2567, "step": 2090 }, { "epoch": 0.17151934409428646, "grad_norm": 0.6122543811798096, "learning_rate": 9.602291756886888e-05, "loss": 3.3027, "step": 2092 }, { "epoch": 0.17168332052267488, "grad_norm": 0.6619787216186523, "learning_rate": 9.601231390852656e-05, "loss": 3.2741, "step": 2094 }, { "epoch": 0.1718472969510633, "grad_norm": 0.5675482153892517, "learning_rate": 9.600169671837149e-05, "loss": 3.2811, "step": 2096 }, { "epoch": 0.1720112733794517, "grad_norm": 0.6895171999931335, "learning_rate": 9.599106600152563e-05, "loss": 3.3162, "step": 2098 }, { "epoch": 0.17217524980784013, "grad_norm": 0.7057105898857117, "learning_rate": 9.59804217611149e-05, "loss": 3.2611, "step": 2100 }, { "epoch": 0.17233922623622855, "grad_norm": 0.5840970277786255, "learning_rate": 9.596976400026925e-05, "loss": 3.2843, "step": 2102 }, { "epoch": 0.17250320266461697, "grad_norm": 0.6528168320655823, "learning_rate": 9.595909272212254e-05, "loss": 3.3212, "step": 2104 }, { "epoch": 0.17266717909300539, "grad_norm": 0.5484073162078857, "learning_rate": 9.594840792981265e-05, "loss": 3.2849, "step": 2106 }, { "epoch": 0.1728311555213938, "grad_norm": 0.5874817967414856, "learning_rate": 9.59377096264814e-05, "loss": 3.3081, "step": 2108 }, { "epoch": 0.17299513194978222, "grad_norm": 0.5669682025909424, "learning_rate": 9.592699781527461e-05, "loss": 3.3498, "step": 2110 }, { "epoch": 0.17315910837817064, "grad_norm": 0.6883938312530518, "learning_rate": 9.591627249934207e-05, "loss": 3.2798, "step": 2112 }, { "epoch": 0.17332308480655906, "grad_norm": 0.6597236394882202, "learning_rate": 9.590553368183753e-05, "loss": 3.2804, "step": 2114 }, { "epoch": 0.17348706123494748, "grad_norm": 0.5905894637107849, "learning_rate": 9.589478136591872e-05, "loss": 3.2825, "step": 2116 }, { "epoch": 0.1736510376633359, "grad_norm": 0.637214183807373, "learning_rate": 9.588401555474732e-05, "loss": 3.3519, "step": 2118 }, { "epoch": 0.1738150140917243, "grad_norm": 0.5943377017974854, "learning_rate": 9.587323625148899e-05, "loss": 3.2875, "step": 2120 }, { "epoch": 0.17397899052011273, "grad_norm": 0.49435749650001526, "learning_rate": 9.586244345931336e-05, "loss": 3.3332, "step": 2122 }, { "epoch": 0.17414296694850115, "grad_norm": 0.6532015800476074, "learning_rate": 9.585163718139405e-05, "loss": 3.3226, "step": 2124 }, { "epoch": 0.17430694337688957, "grad_norm": 0.6478725075721741, "learning_rate": 9.584081742090861e-05, "loss": 3.2902, "step": 2126 }, { "epoch": 0.17447091980527799, "grad_norm": 0.6059595942497253, "learning_rate": 9.582998418103854e-05, "loss": 3.3047, "step": 2128 }, { "epoch": 0.1746348962336664, "grad_norm": 0.6445087790489197, "learning_rate": 9.581913746496934e-05, "loss": 3.3007, "step": 2130 }, { "epoch": 0.17479887266205482, "grad_norm": 0.5795426964759827, "learning_rate": 9.580827727589048e-05, "loss": 3.3193, "step": 2132 }, { "epoch": 0.17496284909044324, "grad_norm": 0.6961095929145813, "learning_rate": 9.579740361699535e-05, "loss": 3.328, "step": 2134 }, { "epoch": 0.17512682551883166, "grad_norm": 0.7018781900405884, "learning_rate": 9.578651649148133e-05, "loss": 3.2589, "step": 2136 }, { "epoch": 0.17529080194722008, "grad_norm": 0.6638593077659607, "learning_rate": 9.577561590254977e-05, "loss": 3.3047, "step": 2138 }, { "epoch": 0.1754547783756085, "grad_norm": 0.8037712574005127, "learning_rate": 9.576470185340596e-05, "loss": 3.3166, "step": 2140 }, { "epoch": 0.1756187548039969, "grad_norm": 0.630042314529419, "learning_rate": 9.575377434725911e-05, "loss": 3.279, "step": 2142 }, { "epoch": 0.17578273123238533, "grad_norm": 0.5768330097198486, "learning_rate": 9.57428333873225e-05, "loss": 3.2599, "step": 2144 }, { "epoch": 0.17594670766077378, "grad_norm": 0.5603185296058655, "learning_rate": 9.573187897681322e-05, "loss": 3.2339, "step": 2146 }, { "epoch": 0.1761106840891622, "grad_norm": 0.7578685879707336, "learning_rate": 9.572091111895243e-05, "loss": 3.3329, "step": 2148 }, { "epoch": 0.1762746605175506, "grad_norm": 0.5876714587211609, "learning_rate": 9.57099298169652e-05, "loss": 3.2627, "step": 2150 }, { "epoch": 0.17643863694593903, "grad_norm": 0.6411724090576172, "learning_rate": 9.569893507408055e-05, "loss": 3.2554, "step": 2152 }, { "epoch": 0.17660261337432745, "grad_norm": 0.6768316626548767, "learning_rate": 9.568792689353143e-05, "loss": 3.2805, "step": 2154 }, { "epoch": 0.17676658980271587, "grad_norm": 0.7020912766456604, "learning_rate": 9.567690527855483e-05, "loss": 3.2498, "step": 2156 }, { "epoch": 0.17693056623110429, "grad_norm": 0.630198061466217, "learning_rate": 9.566587023239157e-05, "loss": 3.3052, "step": 2158 }, { "epoch": 0.1770945426594927, "grad_norm": 0.7058777213096619, "learning_rate": 9.565482175828653e-05, "loss": 3.2939, "step": 2160 }, { "epoch": 0.17725851908788112, "grad_norm": 0.684704601764679, "learning_rate": 9.564375985948846e-05, "loss": 3.2628, "step": 2162 }, { "epoch": 0.17742249551626954, "grad_norm": 0.6196277737617493, "learning_rate": 9.56326845392501e-05, "loss": 3.2804, "step": 2164 }, { "epoch": 0.17758647194465796, "grad_norm": 0.6597474217414856, "learning_rate": 9.562159580082808e-05, "loss": 3.357, "step": 2166 }, { "epoch": 0.17775044837304638, "grad_norm": 0.6917456388473511, "learning_rate": 9.561049364748307e-05, "loss": 3.2818, "step": 2168 }, { "epoch": 0.1779144248014348, "grad_norm": 0.6305201649665833, "learning_rate": 9.559937808247961e-05, "loss": 3.2252, "step": 2170 }, { "epoch": 0.1780784012298232, "grad_norm": 0.6192725896835327, "learning_rate": 9.55882491090862e-05, "loss": 3.3079, "step": 2172 }, { "epoch": 0.17824237765821163, "grad_norm": 0.6527867913246155, "learning_rate": 9.55771067305753e-05, "loss": 3.2065, "step": 2174 }, { "epoch": 0.17840635408660005, "grad_norm": 0.6737766265869141, "learning_rate": 9.556595095022331e-05, "loss": 3.2789, "step": 2176 }, { "epoch": 0.17857033051498847, "grad_norm": 0.5953449010848999, "learning_rate": 9.555478177131052e-05, "loss": 3.2632, "step": 2178 }, { "epoch": 0.17873430694337689, "grad_norm": 0.6226819157600403, "learning_rate": 9.554359919712124e-05, "loss": 3.2756, "step": 2180 }, { "epoch": 0.1788982833717653, "grad_norm": 0.6152170896530151, "learning_rate": 9.553240323094368e-05, "loss": 3.2677, "step": 2182 }, { "epoch": 0.17906225980015372, "grad_norm": 0.5997733473777771, "learning_rate": 9.552119387606997e-05, "loss": 3.2369, "step": 2184 }, { "epoch": 0.17922623622854214, "grad_norm": 0.5696983933448792, "learning_rate": 9.550997113579618e-05, "loss": 3.3002, "step": 2186 }, { "epoch": 0.17939021265693056, "grad_norm": 0.5793240666389465, "learning_rate": 9.549873501342237e-05, "loss": 3.224, "step": 2188 }, { "epoch": 0.17955418908531898, "grad_norm": 0.5453920364379883, "learning_rate": 9.548748551225246e-05, "loss": 3.3058, "step": 2190 }, { "epoch": 0.1797181655137074, "grad_norm": 0.558608889579773, "learning_rate": 9.547622263559437e-05, "loss": 3.3077, "step": 2192 }, { "epoch": 0.1798821419420958, "grad_norm": 0.577103853225708, "learning_rate": 9.546494638675989e-05, "loss": 3.2894, "step": 2194 }, { "epoch": 0.18004611837048423, "grad_norm": 0.5528544187545776, "learning_rate": 9.54536567690648e-05, "loss": 3.236, "step": 2196 }, { "epoch": 0.18021009479887265, "grad_norm": 0.5541223883628845, "learning_rate": 9.544235378582877e-05, "loss": 3.268, "step": 2198 }, { "epoch": 0.18037407122726107, "grad_norm": 0.5316542983055115, "learning_rate": 9.543103744037544e-05, "loss": 3.2532, "step": 2200 }, { "epoch": 0.18053804765564951, "grad_norm": 0.6354775428771973, "learning_rate": 9.541970773603233e-05, "loss": 3.2782, "step": 2202 }, { "epoch": 0.18070202408403793, "grad_norm": 0.5799155235290527, "learning_rate": 9.540836467613092e-05, "loss": 3.224, "step": 2204 }, { "epoch": 0.18086600051242635, "grad_norm": 0.6716517210006714, "learning_rate": 9.53970082640066e-05, "loss": 3.2579, "step": 2206 }, { "epoch": 0.18102997694081477, "grad_norm": 0.6734136343002319, "learning_rate": 9.53856385029987e-05, "loss": 3.2698, "step": 2208 }, { "epoch": 0.1811939533692032, "grad_norm": 0.7301097512245178, "learning_rate": 9.53742553964505e-05, "loss": 3.2738, "step": 2210 }, { "epoch": 0.1813579297975916, "grad_norm": 0.6413136720657349, "learning_rate": 9.536285894770914e-05, "loss": 3.2058, "step": 2212 }, { "epoch": 0.18152190622598002, "grad_norm": 0.6198046803474426, "learning_rate": 9.535144916012575e-05, "loss": 3.2869, "step": 2214 }, { "epoch": 0.18168588265436844, "grad_norm": 0.6237669587135315, "learning_rate": 9.534002603705532e-05, "loss": 3.223, "step": 2216 }, { "epoch": 0.18184985908275686, "grad_norm": 0.626846969127655, "learning_rate": 9.53285895818568e-05, "loss": 3.1932, "step": 2218 }, { "epoch": 0.18201383551114528, "grad_norm": 0.5577837824821472, "learning_rate": 9.531713979789308e-05, "loss": 3.2983, "step": 2220 }, { "epoch": 0.1821778119395337, "grad_norm": 0.5938622951507568, "learning_rate": 9.53056766885309e-05, "loss": 3.2237, "step": 2222 }, { "epoch": 0.18234178836792211, "grad_norm": 0.5766521096229553, "learning_rate": 9.529420025714099e-05, "loss": 3.3077, "step": 2224 }, { "epoch": 0.18250576479631053, "grad_norm": 0.5775324702262878, "learning_rate": 9.528271050709797e-05, "loss": 3.298, "step": 2226 }, { "epoch": 0.18266974122469895, "grad_norm": 0.575670599937439, "learning_rate": 9.527120744178034e-05, "loss": 3.2163, "step": 2228 }, { "epoch": 0.18283371765308737, "grad_norm": 0.5995689630508423, "learning_rate": 9.525969106457059e-05, "loss": 3.2937, "step": 2230 }, { "epoch": 0.1829976940814758, "grad_norm": 0.6333314180374146, "learning_rate": 9.524816137885506e-05, "loss": 3.2528, "step": 2232 }, { "epoch": 0.1831616705098642, "grad_norm": 0.6713608503341675, "learning_rate": 9.523661838802404e-05, "loss": 3.2981, "step": 2234 }, { "epoch": 0.18332564693825262, "grad_norm": 0.5735260844230652, "learning_rate": 9.52250620954717e-05, "loss": 3.2495, "step": 2236 }, { "epoch": 0.18348962336664104, "grad_norm": 0.5098928809165955, "learning_rate": 9.521349250459617e-05, "loss": 3.2896, "step": 2238 }, { "epoch": 0.18365359979502946, "grad_norm": 0.5835528373718262, "learning_rate": 9.520190961879942e-05, "loss": 3.3013, "step": 2240 }, { "epoch": 0.18381757622341788, "grad_norm": 0.5812976360321045, "learning_rate": 9.519031344148741e-05, "loss": 3.2155, "step": 2242 }, { "epoch": 0.1839815526518063, "grad_norm": 0.6407437324523926, "learning_rate": 9.517870397606996e-05, "loss": 3.3021, "step": 2244 }, { "epoch": 0.18414552908019471, "grad_norm": 0.6286873817443848, "learning_rate": 9.516708122596079e-05, "loss": 3.2593, "step": 2246 }, { "epoch": 0.18430950550858313, "grad_norm": 0.6462428569793701, "learning_rate": 9.515544519457755e-05, "loss": 3.2773, "step": 2248 }, { "epoch": 0.18447348193697155, "grad_norm": 0.5487723350524902, "learning_rate": 9.51437958853418e-05, "loss": 3.2368, "step": 2250 }, { "epoch": 0.18463745836535997, "grad_norm": 0.6235312819480896, "learning_rate": 9.513213330167898e-05, "loss": 3.2401, "step": 2252 }, { "epoch": 0.1848014347937484, "grad_norm": 0.6422250866889954, "learning_rate": 9.512045744701843e-05, "loss": 3.252, "step": 2254 }, { "epoch": 0.1849654112221368, "grad_norm": 0.6642846465110779, "learning_rate": 9.510876832479343e-05, "loss": 3.2247, "step": 2256 }, { "epoch": 0.18512938765052522, "grad_norm": 0.6005398631095886, "learning_rate": 9.509706593844114e-05, "loss": 3.2238, "step": 2258 }, { "epoch": 0.18529336407891367, "grad_norm": 0.5651940107345581, "learning_rate": 9.508535029140262e-05, "loss": 3.2212, "step": 2260 }, { "epoch": 0.1854573405073021, "grad_norm": 0.6252986788749695, "learning_rate": 9.507362138712282e-05, "loss": 3.2603, "step": 2262 }, { "epoch": 0.1856213169356905, "grad_norm": 0.6485080718994141, "learning_rate": 9.506187922905057e-05, "loss": 3.2465, "step": 2264 }, { "epoch": 0.18578529336407892, "grad_norm": 0.5912032723426819, "learning_rate": 9.505012382063869e-05, "loss": 3.1827, "step": 2266 }, { "epoch": 0.18594926979246734, "grad_norm": 0.6242038607597351, "learning_rate": 9.503835516534376e-05, "loss": 3.2717, "step": 2268 }, { "epoch": 0.18611324622085576, "grad_norm": 0.6092646718025208, "learning_rate": 9.502657326662637e-05, "loss": 3.2605, "step": 2270 }, { "epoch": 0.18627722264924418, "grad_norm": 0.5553577542304993, "learning_rate": 9.501477812795094e-05, "loss": 3.2083, "step": 2272 }, { "epoch": 0.1864411990776326, "grad_norm": 0.5823682546615601, "learning_rate": 9.500296975278581e-05, "loss": 3.2408, "step": 2274 }, { "epoch": 0.18660517550602101, "grad_norm": 0.6024842858314514, "learning_rate": 9.499114814460323e-05, "loss": 3.2654, "step": 2276 }, { "epoch": 0.18676915193440943, "grad_norm": 0.6093845367431641, "learning_rate": 9.497931330687926e-05, "loss": 3.2261, "step": 2278 }, { "epoch": 0.18693312836279785, "grad_norm": 0.5323441624641418, "learning_rate": 9.496746524309396e-05, "loss": 3.286, "step": 2280 }, { "epoch": 0.18709710479118627, "grad_norm": 0.6634844541549683, "learning_rate": 9.495560395673119e-05, "loss": 3.2319, "step": 2282 }, { "epoch": 0.1872610812195747, "grad_norm": 0.5159028172492981, "learning_rate": 9.494372945127873e-05, "loss": 3.2609, "step": 2284 }, { "epoch": 0.1874250576479631, "grad_norm": 0.6331459283828735, "learning_rate": 9.493184173022829e-05, "loss": 3.2177, "step": 2286 }, { "epoch": 0.18758903407635152, "grad_norm": 0.7428011894226074, "learning_rate": 9.49199407970754e-05, "loss": 3.2783, "step": 2288 }, { "epoch": 0.18775301050473994, "grad_norm": 0.5949831604957581, "learning_rate": 9.49080266553195e-05, "loss": 3.2425, "step": 2290 }, { "epoch": 0.18791698693312836, "grad_norm": 0.7588968873023987, "learning_rate": 9.489609930846391e-05, "loss": 3.2672, "step": 2292 }, { "epoch": 0.18808096336151678, "grad_norm": 0.592046320438385, "learning_rate": 9.488415876001586e-05, "loss": 3.2341, "step": 2294 }, { "epoch": 0.1882449397899052, "grad_norm": 0.5699316263198853, "learning_rate": 9.487220501348642e-05, "loss": 3.2835, "step": 2296 }, { "epoch": 0.18840891621829362, "grad_norm": 0.634863555431366, "learning_rate": 9.486023807239057e-05, "loss": 3.2315, "step": 2298 }, { "epoch": 0.18857289264668203, "grad_norm": 0.5566238760948181, "learning_rate": 9.484825794024716e-05, "loss": 3.1947, "step": 2300 }, { "epoch": 0.18873686907507045, "grad_norm": 0.6761863231658936, "learning_rate": 9.483626462057893e-05, "loss": 3.2531, "step": 2302 }, { "epoch": 0.18890084550345887, "grad_norm": 0.561553955078125, "learning_rate": 9.482425811691247e-05, "loss": 3.1987, "step": 2304 }, { "epoch": 0.1890648219318473, "grad_norm": 0.7657498121261597, "learning_rate": 9.481223843277827e-05, "loss": 3.2818, "step": 2306 }, { "epoch": 0.1892287983602357, "grad_norm": 0.5893799066543579, "learning_rate": 9.480020557171068e-05, "loss": 3.2392, "step": 2308 }, { "epoch": 0.18939277478862412, "grad_norm": 0.6204321980476379, "learning_rate": 9.478815953724796e-05, "loss": 3.1719, "step": 2310 }, { "epoch": 0.18955675121701254, "grad_norm": 0.5662544965744019, "learning_rate": 9.47761003329322e-05, "loss": 3.1849, "step": 2312 }, { "epoch": 0.18972072764540096, "grad_norm": 0.5701155662536621, "learning_rate": 9.476402796230938e-05, "loss": 3.2727, "step": 2314 }, { "epoch": 0.1898847040737894, "grad_norm": 0.5828278064727783, "learning_rate": 9.475194242892936e-05, "loss": 3.2154, "step": 2316 }, { "epoch": 0.19004868050217782, "grad_norm": 0.6154817342758179, "learning_rate": 9.473984373634586e-05, "loss": 3.2404, "step": 2318 }, { "epoch": 0.19021265693056624, "grad_norm": 0.5927799940109253, "learning_rate": 9.472773188811647e-05, "loss": 3.2507, "step": 2320 }, { "epoch": 0.19037663335895466, "grad_norm": 0.5183115601539612, "learning_rate": 9.471560688780266e-05, "loss": 3.2129, "step": 2322 }, { "epoch": 0.19054060978734308, "grad_norm": 0.5379958748817444, "learning_rate": 9.470346873896974e-05, "loss": 3.2855, "step": 2324 }, { "epoch": 0.1907045862157315, "grad_norm": 0.5806155204772949, "learning_rate": 9.46913174451869e-05, "loss": 3.2789, "step": 2326 }, { "epoch": 0.19086856264411992, "grad_norm": 0.5110951066017151, "learning_rate": 9.467915301002723e-05, "loss": 3.1858, "step": 2328 }, { "epoch": 0.19103253907250833, "grad_norm": 0.6017136573791504, "learning_rate": 9.466697543706764e-05, "loss": 3.214, "step": 2330 }, { "epoch": 0.19119651550089675, "grad_norm": 0.620817244052887, "learning_rate": 9.46547847298889e-05, "loss": 3.2424, "step": 2332 }, { "epoch": 0.19136049192928517, "grad_norm": 0.5650565028190613, "learning_rate": 9.464258089207569e-05, "loss": 3.1888, "step": 2334 }, { "epoch": 0.1915244683576736, "grad_norm": 0.690185546875, "learning_rate": 9.46303639272165e-05, "loss": 3.2409, "step": 2336 }, { "epoch": 0.191688444786062, "grad_norm": 0.5948742032051086, "learning_rate": 9.461813383890367e-05, "loss": 3.2372, "step": 2338 }, { "epoch": 0.19185242121445042, "grad_norm": 0.6050938367843628, "learning_rate": 9.460589063073349e-05, "loss": 3.1971, "step": 2340 }, { "epoch": 0.19201639764283884, "grad_norm": 0.6243955492973328, "learning_rate": 9.459363430630601e-05, "loss": 3.2438, "step": 2342 }, { "epoch": 0.19218037407122726, "grad_norm": 0.6408066153526306, "learning_rate": 9.458136486922519e-05, "loss": 3.2632, "step": 2344 }, { "epoch": 0.19234435049961568, "grad_norm": 0.6085670590400696, "learning_rate": 9.45690823230988e-05, "loss": 3.2042, "step": 2346 }, { "epoch": 0.1925083269280041, "grad_norm": 0.6089074015617371, "learning_rate": 9.455678667153853e-05, "loss": 3.1707, "step": 2348 }, { "epoch": 0.19267230335639252, "grad_norm": 0.572694718837738, "learning_rate": 9.454447791815986e-05, "loss": 3.1984, "step": 2350 }, { "epoch": 0.19283627978478093, "grad_norm": 0.5600984692573547, "learning_rate": 9.453215606658217e-05, "loss": 3.2981, "step": 2352 }, { "epoch": 0.19300025621316935, "grad_norm": 0.6271937489509583, "learning_rate": 9.451982112042866e-05, "loss": 3.2026, "step": 2354 }, { "epoch": 0.19316423264155777, "grad_norm": 0.5863776803016663, "learning_rate": 9.450747308332639e-05, "loss": 3.1766, "step": 2356 }, { "epoch": 0.1933282090699462, "grad_norm": 0.545541524887085, "learning_rate": 9.449511195890628e-05, "loss": 3.2055, "step": 2358 }, { "epoch": 0.1934921854983346, "grad_norm": 0.5626051425933838, "learning_rate": 9.44827377508031e-05, "loss": 3.2633, "step": 2360 }, { "epoch": 0.19365616192672302, "grad_norm": 0.5464023351669312, "learning_rate": 9.44703504626554e-05, "loss": 3.2158, "step": 2362 }, { "epoch": 0.19382013835511144, "grad_norm": 0.57725989818573, "learning_rate": 9.445795009810572e-05, "loss": 3.2539, "step": 2364 }, { "epoch": 0.19398411478349986, "grad_norm": 0.5377549529075623, "learning_rate": 9.444553666080029e-05, "loss": 3.2038, "step": 2366 }, { "epoch": 0.19414809121188828, "grad_norm": 0.5789408087730408, "learning_rate": 9.443311015438927e-05, "loss": 3.2505, "step": 2368 }, { "epoch": 0.1943120676402767, "grad_norm": 0.5615429878234863, "learning_rate": 9.442067058252666e-05, "loss": 3.2348, "step": 2370 }, { "epoch": 0.19447604406866512, "grad_norm": 0.5808910131454468, "learning_rate": 9.440821794887028e-05, "loss": 3.2645, "step": 2372 }, { "epoch": 0.19464002049705356, "grad_norm": 0.5342815518379211, "learning_rate": 9.439575225708179e-05, "loss": 3.1556, "step": 2374 }, { "epoch": 0.19480399692544198, "grad_norm": 0.6104359030723572, "learning_rate": 9.438327351082669e-05, "loss": 3.1896, "step": 2376 }, { "epoch": 0.1949679733538304, "grad_norm": 0.47551843523979187, "learning_rate": 9.437078171377437e-05, "loss": 3.236, "step": 2378 }, { "epoch": 0.19513194978221882, "grad_norm": 0.5371410846710205, "learning_rate": 9.435827686959795e-05, "loss": 3.2451, "step": 2380 }, { "epoch": 0.19529592621060723, "grad_norm": 0.5497537851333618, "learning_rate": 9.43457589819745e-05, "loss": 3.1947, "step": 2382 }, { "epoch": 0.19545990263899565, "grad_norm": 0.6186292767524719, "learning_rate": 9.433322805458484e-05, "loss": 3.242, "step": 2384 }, { "epoch": 0.19562387906738407, "grad_norm": 0.6111587882041931, "learning_rate": 9.43206840911137e-05, "loss": 3.2098, "step": 2386 }, { "epoch": 0.1957878554957725, "grad_norm": 0.636885941028595, "learning_rate": 9.430812709524956e-05, "loss": 3.2636, "step": 2388 }, { "epoch": 0.1959518319241609, "grad_norm": 0.5833829045295715, "learning_rate": 9.42955570706848e-05, "loss": 3.147, "step": 2390 }, { "epoch": 0.19611580835254933, "grad_norm": 0.645831823348999, "learning_rate": 9.42829740211156e-05, "loss": 3.2301, "step": 2392 }, { "epoch": 0.19627978478093774, "grad_norm": 0.5893756151199341, "learning_rate": 9.427037795024199e-05, "loss": 3.2677, "step": 2394 }, { "epoch": 0.19644376120932616, "grad_norm": 0.5982114672660828, "learning_rate": 9.425776886176778e-05, "loss": 3.1879, "step": 2396 }, { "epoch": 0.19660773763771458, "grad_norm": 0.6196883916854858, "learning_rate": 9.424514675940068e-05, "loss": 3.1708, "step": 2398 }, { "epoch": 0.196771714066103, "grad_norm": 0.5512893199920654, "learning_rate": 9.423251164685217e-05, "loss": 3.1997, "step": 2400 }, { "epoch": 0.19693569049449142, "grad_norm": 0.6537207961082458, "learning_rate": 9.421986352783759e-05, "loss": 3.2384, "step": 2402 }, { "epoch": 0.19709966692287983, "grad_norm": 0.572920024394989, "learning_rate": 9.420720240607606e-05, "loss": 3.1938, "step": 2404 }, { "epoch": 0.19726364335126825, "grad_norm": 0.5719939470291138, "learning_rate": 9.419452828529058e-05, "loss": 3.2079, "step": 2406 }, { "epoch": 0.19742761977965667, "grad_norm": 0.5642483234405518, "learning_rate": 9.418184116920794e-05, "loss": 3.2037, "step": 2408 }, { "epoch": 0.1975915962080451, "grad_norm": 0.4986971914768219, "learning_rate": 9.416914106155875e-05, "loss": 3.1913, "step": 2410 }, { "epoch": 0.1977555726364335, "grad_norm": 0.5548354983329773, "learning_rate": 9.415642796607746e-05, "loss": 3.2272, "step": 2412 }, { "epoch": 0.19791954906482193, "grad_norm": 0.6837654113769531, "learning_rate": 9.414370188650231e-05, "loss": 3.2308, "step": 2414 }, { "epoch": 0.19808352549321034, "grad_norm": 0.7141901850700378, "learning_rate": 9.413096282657538e-05, "loss": 3.1775, "step": 2416 }, { "epoch": 0.19824750192159876, "grad_norm": 0.7015743851661682, "learning_rate": 9.411821079004258e-05, "loss": 3.1921, "step": 2418 }, { "epoch": 0.19841147834998718, "grad_norm": 0.5208891034126282, "learning_rate": 9.410544578065358e-05, "loss": 3.1988, "step": 2420 }, { "epoch": 0.1985754547783756, "grad_norm": 0.6202064752578735, "learning_rate": 9.409266780216191e-05, "loss": 3.1672, "step": 2422 }, { "epoch": 0.19873943120676402, "grad_norm": 0.670427143573761, "learning_rate": 9.407987685832493e-05, "loss": 3.2011, "step": 2424 }, { "epoch": 0.19890340763515243, "grad_norm": 0.6327905058860779, "learning_rate": 9.406707295290377e-05, "loss": 3.216, "step": 2426 }, { "epoch": 0.19906738406354085, "grad_norm": 0.6474474668502808, "learning_rate": 9.405425608966338e-05, "loss": 3.2229, "step": 2428 }, { "epoch": 0.19923136049192927, "grad_norm": 0.7056578993797302, "learning_rate": 9.404142627237255e-05, "loss": 3.2508, "step": 2430 }, { "epoch": 0.19939533692031772, "grad_norm": 0.5632738471031189, "learning_rate": 9.402858350480383e-05, "loss": 3.1891, "step": 2432 }, { "epoch": 0.19955931334870614, "grad_norm": 0.5663987994194031, "learning_rate": 9.401572779073363e-05, "loss": 3.1317, "step": 2434 }, { "epoch": 0.19972328977709455, "grad_norm": 0.6044129729270935, "learning_rate": 9.400285913394213e-05, "loss": 3.1676, "step": 2436 }, { "epoch": 0.19988726620548297, "grad_norm": 0.5997135639190674, "learning_rate": 9.398997753821334e-05, "loss": 3.2122, "step": 2438 }, { "epoch": 0.2000512426338714, "grad_norm": 0.5731354355812073, "learning_rate": 9.397708300733503e-05, "loss": 3.146, "step": 2440 }, { "epoch": 0.2002152190622598, "grad_norm": 0.5749174356460571, "learning_rate": 9.396417554509885e-05, "loss": 3.2533, "step": 2442 }, { "epoch": 0.20037919549064823, "grad_norm": 0.6123077869415283, "learning_rate": 9.395125515530019e-05, "loss": 3.2046, "step": 2444 }, { "epoch": 0.20054317191903664, "grad_norm": 0.5596455335617065, "learning_rate": 9.393832184173826e-05, "loss": 3.2109, "step": 2446 }, { "epoch": 0.20070714834742506, "grad_norm": 0.6029247641563416, "learning_rate": 9.392537560821606e-05, "loss": 3.2487, "step": 2448 }, { "epoch": 0.20087112477581348, "grad_norm": 0.5740619897842407, "learning_rate": 9.391241645854041e-05, "loss": 3.1954, "step": 2450 }, { "epoch": 0.2010351012042019, "grad_norm": 0.4807377755641937, "learning_rate": 9.389944439652194e-05, "loss": 3.232, "step": 2452 }, { "epoch": 0.20119907763259032, "grad_norm": 0.4954237937927246, "learning_rate": 9.388645942597501e-05, "loss": 3.1604, "step": 2454 }, { "epoch": 0.20136305406097874, "grad_norm": 0.5956186652183533, "learning_rate": 9.387346155071785e-05, "loss": 3.2222, "step": 2456 }, { "epoch": 0.20152703048936715, "grad_norm": 0.5427073240280151, "learning_rate": 9.386045077457244e-05, "loss": 3.199, "step": 2458 }, { "epoch": 0.20169100691775557, "grad_norm": 0.6691755056381226, "learning_rate": 9.384742710136458e-05, "loss": 3.1441, "step": 2460 }, { "epoch": 0.201854983346144, "grad_norm": 0.5961546301841736, "learning_rate": 9.383439053492384e-05, "loss": 3.2128, "step": 2462 }, { "epoch": 0.2020189597745324, "grad_norm": 0.627190887928009, "learning_rate": 9.38213410790836e-05, "loss": 3.2204, "step": 2464 }, { "epoch": 0.20218293620292083, "grad_norm": 0.6342671513557434, "learning_rate": 9.380827873768101e-05, "loss": 3.1279, "step": 2466 }, { "epoch": 0.20234691263130924, "grad_norm": 0.5942181944847107, "learning_rate": 9.379520351455705e-05, "loss": 3.204, "step": 2468 }, { "epoch": 0.20251088905969766, "grad_norm": 0.5538034439086914, "learning_rate": 9.378211541355643e-05, "loss": 3.2224, "step": 2470 }, { "epoch": 0.20267486548808608, "grad_norm": 0.5361983776092529, "learning_rate": 9.376901443852767e-05, "loss": 3.1103, "step": 2472 }, { "epoch": 0.2028388419164745, "grad_norm": 0.6236636638641357, "learning_rate": 9.375590059332311e-05, "loss": 3.2047, "step": 2474 }, { "epoch": 0.20300281834486292, "grad_norm": 0.5351163148880005, "learning_rate": 9.374277388179882e-05, "loss": 3.2153, "step": 2476 }, { "epoch": 0.20316679477325134, "grad_norm": 0.5665524005889893, "learning_rate": 9.37296343078147e-05, "loss": 3.1779, "step": 2478 }, { "epoch": 0.20333077120163975, "grad_norm": 0.5840953588485718, "learning_rate": 9.371648187523439e-05, "loss": 3.1732, "step": 2480 }, { "epoch": 0.20349474763002817, "grad_norm": 0.5002064108848572, "learning_rate": 9.370331658792534e-05, "loss": 3.1473, "step": 2482 }, { "epoch": 0.2036587240584166, "grad_norm": 0.5152116417884827, "learning_rate": 9.369013844975878e-05, "loss": 3.1973, "step": 2484 }, { "epoch": 0.203822700486805, "grad_norm": 0.5382422208786011, "learning_rate": 9.367694746460969e-05, "loss": 3.1354, "step": 2486 }, { "epoch": 0.20398667691519345, "grad_norm": 0.5114589929580688, "learning_rate": 9.366374363635688e-05, "loss": 3.1416, "step": 2488 }, { "epoch": 0.20415065334358187, "grad_norm": 0.5633603930473328, "learning_rate": 9.365052696888288e-05, "loss": 3.1328, "step": 2490 }, { "epoch": 0.2043146297719703, "grad_norm": 0.6122515201568604, "learning_rate": 9.363729746607401e-05, "loss": 3.2152, "step": 2492 }, { "epoch": 0.2044786062003587, "grad_norm": 0.49608293175697327, "learning_rate": 9.36240551318204e-05, "loss": 3.1354, "step": 2494 }, { "epoch": 0.20464258262874713, "grad_norm": 0.6546223759651184, "learning_rate": 9.361079997001592e-05, "loss": 3.2126, "step": 2496 }, { "epoch": 0.20480655905713555, "grad_norm": 0.6353023648262024, "learning_rate": 9.359753198455823e-05, "loss": 3.1782, "step": 2498 }, { "epoch": 0.20497053548552396, "grad_norm": 0.5790070295333862, "learning_rate": 9.358425117934873e-05, "loss": 3.1635, "step": 2500 }, { "epoch": 0.20513451191391238, "grad_norm": 0.5316998958587646, "learning_rate": 9.357095755829259e-05, "loss": 3.1684, "step": 2502 }, { "epoch": 0.2052984883423008, "grad_norm": 0.5418469905853271, "learning_rate": 9.355765112529882e-05, "loss": 3.2322, "step": 2504 }, { "epoch": 0.20546246477068922, "grad_norm": 0.5909755229949951, "learning_rate": 9.35443318842801e-05, "loss": 3.1637, "step": 2506 }, { "epoch": 0.20562644119907764, "grad_norm": 0.5913002490997314, "learning_rate": 9.353099983915298e-05, "loss": 3.1568, "step": 2508 }, { "epoch": 0.20579041762746605, "grad_norm": 0.5578615665435791, "learning_rate": 9.351765499383764e-05, "loss": 3.1382, "step": 2510 }, { "epoch": 0.20595439405585447, "grad_norm": 0.5887861251831055, "learning_rate": 9.350429735225816e-05, "loss": 3.1946, "step": 2512 }, { "epoch": 0.2061183704842429, "grad_norm": 0.5453567504882812, "learning_rate": 9.34909269183423e-05, "loss": 3.1474, "step": 2514 }, { "epoch": 0.2062823469126313, "grad_norm": 0.5504783987998962, "learning_rate": 9.34775436960216e-05, "loss": 3.1371, "step": 2516 }, { "epoch": 0.20644632334101973, "grad_norm": 0.5522333979606628, "learning_rate": 9.346414768923138e-05, "loss": 3.1736, "step": 2518 }, { "epoch": 0.20661029976940815, "grad_norm": 0.5545846223831177, "learning_rate": 9.345073890191067e-05, "loss": 3.155, "step": 2520 }, { "epoch": 0.20677427619779656, "grad_norm": 0.5755533576011658, "learning_rate": 9.343731733800235e-05, "loss": 3.1211, "step": 2522 }, { "epoch": 0.20693825262618498, "grad_norm": 0.6164469122886658, "learning_rate": 9.342388300145294e-05, "loss": 3.1781, "step": 2524 }, { "epoch": 0.2071022290545734, "grad_norm": 0.5937029123306274, "learning_rate": 9.341043589621282e-05, "loss": 3.2039, "step": 2526 }, { "epoch": 0.20726620548296182, "grad_norm": 0.5489475727081299, "learning_rate": 9.339697602623605e-05, "loss": 3.1502, "step": 2528 }, { "epoch": 0.20743018191135024, "grad_norm": 0.6091250777244568, "learning_rate": 9.338350339548048e-05, "loss": 3.1774, "step": 2530 }, { "epoch": 0.20759415833973865, "grad_norm": 0.5674654841423035, "learning_rate": 9.337001800790773e-05, "loss": 3.1535, "step": 2532 }, { "epoch": 0.20775813476812707, "grad_norm": 0.644279420375824, "learning_rate": 9.33565198674831e-05, "loss": 3.1406, "step": 2534 }, { "epoch": 0.2079221111965155, "grad_norm": 0.6195595264434814, "learning_rate": 9.334300897817574e-05, "loss": 3.1527, "step": 2536 }, { "epoch": 0.2080860876249039, "grad_norm": 0.5304683446884155, "learning_rate": 9.332948534395846e-05, "loss": 3.1957, "step": 2538 }, { "epoch": 0.20825006405329233, "grad_norm": 0.6691213250160217, "learning_rate": 9.331594896880787e-05, "loss": 3.2468, "step": 2540 }, { "epoch": 0.20841404048168075, "grad_norm": 0.5579569339752197, "learning_rate": 9.330239985670427e-05, "loss": 3.1475, "step": 2542 }, { "epoch": 0.20857801691006916, "grad_norm": 0.6016284227371216, "learning_rate": 9.328883801163181e-05, "loss": 3.1298, "step": 2544 }, { "epoch": 0.2087419933384576, "grad_norm": 0.5903862714767456, "learning_rate": 9.327526343757826e-05, "loss": 3.1804, "step": 2546 }, { "epoch": 0.20890596976684603, "grad_norm": 0.5137822031974792, "learning_rate": 9.326167613853523e-05, "loss": 3.1662, "step": 2548 }, { "epoch": 0.20906994619523445, "grad_norm": 0.5315471887588501, "learning_rate": 9.324807611849802e-05, "loss": 3.2222, "step": 2550 }, { "epoch": 0.20923392262362286, "grad_norm": 0.5678295493125916, "learning_rate": 9.323446338146568e-05, "loss": 3.147, "step": 2552 }, { "epoch": 0.20939789905201128, "grad_norm": 0.5671442151069641, "learning_rate": 9.322083793144101e-05, "loss": 3.1514, "step": 2554 }, { "epoch": 0.2095618754803997, "grad_norm": 0.5480635166168213, "learning_rate": 9.320719977243052e-05, "loss": 3.1943, "step": 2556 }, { "epoch": 0.20972585190878812, "grad_norm": 0.573996901512146, "learning_rate": 9.319354890844451e-05, "loss": 3.1084, "step": 2558 }, { "epoch": 0.20988982833717654, "grad_norm": 0.5476592183113098, "learning_rate": 9.317988534349697e-05, "loss": 3.1328, "step": 2560 }, { "epoch": 0.21005380476556496, "grad_norm": 0.5603650808334351, "learning_rate": 9.316620908160562e-05, "loss": 3.256, "step": 2562 }, { "epoch": 0.21021778119395337, "grad_norm": 0.5470094680786133, "learning_rate": 9.315252012679198e-05, "loss": 3.2453, "step": 2564 }, { "epoch": 0.2103817576223418, "grad_norm": 0.5147728323936462, "learning_rate": 9.313881848308123e-05, "loss": 3.2067, "step": 2566 }, { "epoch": 0.2105457340507302, "grad_norm": 0.5771604776382446, "learning_rate": 9.312510415450228e-05, "loss": 3.1415, "step": 2568 }, { "epoch": 0.21070971047911863, "grad_norm": 0.5814144015312195, "learning_rate": 9.311137714508785e-05, "loss": 3.1481, "step": 2570 }, { "epoch": 0.21087368690750705, "grad_norm": 0.589153528213501, "learning_rate": 9.309763745887428e-05, "loss": 3.1297, "step": 2572 }, { "epoch": 0.21103766333589546, "grad_norm": 0.5519060492515564, "learning_rate": 9.308388509990171e-05, "loss": 3.1409, "step": 2574 }, { "epoch": 0.21120163976428388, "grad_norm": 0.5374418497085571, "learning_rate": 9.307012007221401e-05, "loss": 3.2072, "step": 2576 }, { "epoch": 0.2113656161926723, "grad_norm": 0.5539153218269348, "learning_rate": 9.305634237985874e-05, "loss": 3.1928, "step": 2578 }, { "epoch": 0.21152959262106072, "grad_norm": 0.47456660866737366, "learning_rate": 9.304255202688721e-05, "loss": 3.1669, "step": 2580 }, { "epoch": 0.21169356904944914, "grad_norm": 0.5018851161003113, "learning_rate": 9.302874901735441e-05, "loss": 3.1694, "step": 2582 }, { "epoch": 0.21185754547783756, "grad_norm": 0.5462913513183594, "learning_rate": 9.301493335531911e-05, "loss": 3.1753, "step": 2584 }, { "epoch": 0.21202152190622597, "grad_norm": 0.5394952297210693, "learning_rate": 9.300110504484377e-05, "loss": 3.1824, "step": 2586 }, { "epoch": 0.2121854983346144, "grad_norm": 0.6784063577651978, "learning_rate": 9.298726408999455e-05, "loss": 3.1717, "step": 2588 }, { "epoch": 0.2123494747630028, "grad_norm": 0.5093061923980713, "learning_rate": 9.297341049484139e-05, "loss": 3.1197, "step": 2590 }, { "epoch": 0.21251345119139123, "grad_norm": 0.5276237726211548, "learning_rate": 9.295954426345786e-05, "loss": 3.1307, "step": 2592 }, { "epoch": 0.21267742761977965, "grad_norm": 0.6057010293006897, "learning_rate": 9.294566539992132e-05, "loss": 3.1619, "step": 2594 }, { "epoch": 0.21284140404816806, "grad_norm": 0.6017722487449646, "learning_rate": 9.293177390831282e-05, "loss": 3.1196, "step": 2596 }, { "epoch": 0.21300538047655648, "grad_norm": 0.5458320379257202, "learning_rate": 9.291786979271712e-05, "loss": 3.1665, "step": 2598 }, { "epoch": 0.2131693569049449, "grad_norm": 0.6224083304405212, "learning_rate": 9.290395305722269e-05, "loss": 3.1768, "step": 2600 }, { "epoch": 0.21333333333333335, "grad_norm": 0.5466166138648987, "learning_rate": 9.289002370592168e-05, "loss": 3.135, "step": 2602 }, { "epoch": 0.21349730976172177, "grad_norm": 0.6690223217010498, "learning_rate": 9.287608174291004e-05, "loss": 3.2039, "step": 2604 }, { "epoch": 0.21366128619011018, "grad_norm": 0.6225460767745972, "learning_rate": 9.286212717228734e-05, "loss": 3.1377, "step": 2606 }, { "epoch": 0.2138252626184986, "grad_norm": 0.6038724184036255, "learning_rate": 9.28481599981569e-05, "loss": 3.1148, "step": 2608 }, { "epoch": 0.21398923904688702, "grad_norm": 0.5335320830345154, "learning_rate": 9.283418022462571e-05, "loss": 3.1726, "step": 2610 }, { "epoch": 0.21415321547527544, "grad_norm": 0.5151216387748718, "learning_rate": 9.282018785580452e-05, "loss": 3.1839, "step": 2612 }, { "epoch": 0.21431719190366386, "grad_norm": 0.5432109236717224, "learning_rate": 9.280618289580773e-05, "loss": 3.1388, "step": 2614 }, { "epoch": 0.21448116833205227, "grad_norm": 0.4836788475513458, "learning_rate": 9.27921653487535e-05, "loss": 3.133, "step": 2616 }, { "epoch": 0.2146451447604407, "grad_norm": 0.5969836115837097, "learning_rate": 9.277813521876361e-05, "loss": 3.1967, "step": 2618 }, { "epoch": 0.2148091211888291, "grad_norm": 0.5812448859214783, "learning_rate": 9.276409250996362e-05, "loss": 3.0804, "step": 2620 }, { "epoch": 0.21497309761721753, "grad_norm": 0.6391003131866455, "learning_rate": 9.275003722648274e-05, "loss": 3.1616, "step": 2622 }, { "epoch": 0.21513707404560595, "grad_norm": 0.6133304834365845, "learning_rate": 9.27359693724539e-05, "loss": 3.1099, "step": 2624 }, { "epoch": 0.21530105047399437, "grad_norm": 0.7073734402656555, "learning_rate": 9.272188895201372e-05, "loss": 3.2123, "step": 2626 }, { "epoch": 0.21546502690238278, "grad_norm": 0.6187078952789307, "learning_rate": 9.270779596930252e-05, "loss": 3.1732, "step": 2628 }, { "epoch": 0.2156290033307712, "grad_norm": 0.5692609548568726, "learning_rate": 9.269369042846428e-05, "loss": 3.1112, "step": 2630 }, { "epoch": 0.21579297975915962, "grad_norm": 0.6214010715484619, "learning_rate": 9.267957233364674e-05, "loss": 3.1889, "step": 2632 }, { "epoch": 0.21595695618754804, "grad_norm": 0.575520932674408, "learning_rate": 9.266544168900126e-05, "loss": 3.1127, "step": 2634 }, { "epoch": 0.21612093261593646, "grad_norm": 0.5261242985725403, "learning_rate": 9.265129849868294e-05, "loss": 3.1123, "step": 2636 }, { "epoch": 0.21628490904432487, "grad_norm": 0.5849714279174805, "learning_rate": 9.263714276685056e-05, "loss": 3.1153, "step": 2638 }, { "epoch": 0.2164488854727133, "grad_norm": 0.563801109790802, "learning_rate": 9.262297449766657e-05, "loss": 3.1947, "step": 2640 }, { "epoch": 0.2166128619011017, "grad_norm": 0.5794183611869812, "learning_rate": 9.260879369529711e-05, "loss": 3.1205, "step": 2642 }, { "epoch": 0.21677683832949013, "grad_norm": 0.6427027583122253, "learning_rate": 9.259460036391201e-05, "loss": 3.1642, "step": 2644 }, { "epoch": 0.21694081475787855, "grad_norm": 0.5406637191772461, "learning_rate": 9.25803945076848e-05, "loss": 3.1695, "step": 2646 }, { "epoch": 0.21710479118626697, "grad_norm": 0.5383151769638062, "learning_rate": 9.256617613079267e-05, "loss": 3.0954, "step": 2648 }, { "epoch": 0.21726876761465538, "grad_norm": 0.6771288514137268, "learning_rate": 9.25519452374165e-05, "loss": 3.1934, "step": 2650 }, { "epoch": 0.2174327440430438, "grad_norm": 0.639716386795044, "learning_rate": 9.253770183174085e-05, "loss": 3.1192, "step": 2652 }, { "epoch": 0.21759672047143222, "grad_norm": 0.5584697127342224, "learning_rate": 9.252344591795396e-05, "loss": 3.2127, "step": 2654 }, { "epoch": 0.21776069689982064, "grad_norm": 0.5381549000740051, "learning_rate": 9.250917750024777e-05, "loss": 3.1321, "step": 2656 }, { "epoch": 0.21792467332820906, "grad_norm": 0.5931708216667175, "learning_rate": 9.249489658281783e-05, "loss": 3.1762, "step": 2658 }, { "epoch": 0.2180886497565975, "grad_norm": 0.5969710946083069, "learning_rate": 9.248060316986344e-05, "loss": 3.1158, "step": 2660 }, { "epoch": 0.21825262618498592, "grad_norm": 0.6363986134529114, "learning_rate": 9.246629726558756e-05, "loss": 3.242, "step": 2662 }, { "epoch": 0.21841660261337434, "grad_norm": 0.540717363357544, "learning_rate": 9.245197887419676e-05, "loss": 3.1661, "step": 2664 }, { "epoch": 0.21858057904176276, "grad_norm": 0.589412271976471, "learning_rate": 9.243764799990136e-05, "loss": 3.1018, "step": 2666 }, { "epoch": 0.21874455547015118, "grad_norm": 0.5175191164016724, "learning_rate": 9.242330464691533e-05, "loss": 3.1773, "step": 2668 }, { "epoch": 0.2189085318985396, "grad_norm": 0.5175068974494934, "learning_rate": 9.240894881945627e-05, "loss": 3.1566, "step": 2670 }, { "epoch": 0.219072508326928, "grad_norm": 0.48125573992729187, "learning_rate": 9.239458052174551e-05, "loss": 3.106, "step": 2672 }, { "epoch": 0.21923648475531643, "grad_norm": 0.6103034019470215, "learning_rate": 9.238019975800799e-05, "loss": 3.1316, "step": 2674 }, { "epoch": 0.21940046118370485, "grad_norm": 0.5315214991569519, "learning_rate": 9.236580653247235e-05, "loss": 3.0961, "step": 2676 }, { "epoch": 0.21956443761209327, "grad_norm": 0.5600281357765198, "learning_rate": 9.235140084937086e-05, "loss": 3.0966, "step": 2678 }, { "epoch": 0.21972841404048168, "grad_norm": 0.5466108322143555, "learning_rate": 9.233698271293953e-05, "loss": 3.1325, "step": 2680 }, { "epoch": 0.2198923904688701, "grad_norm": 0.5320989489555359, "learning_rate": 9.232255212741792e-05, "loss": 3.1617, "step": 2682 }, { "epoch": 0.22005636689725852, "grad_norm": 0.4955200254917145, "learning_rate": 9.230810909704934e-05, "loss": 3.1351, "step": 2684 }, { "epoch": 0.22022034332564694, "grad_norm": 0.5174024105072021, "learning_rate": 9.229365362608074e-05, "loss": 3.1993, "step": 2686 }, { "epoch": 0.22038431975403536, "grad_norm": 0.576806366443634, "learning_rate": 9.22791857187627e-05, "loss": 3.1945, "step": 2688 }, { "epoch": 0.22054829618242378, "grad_norm": 0.6114248633384705, "learning_rate": 9.226470537934948e-05, "loss": 3.1272, "step": 2690 }, { "epoch": 0.2207122726108122, "grad_norm": 0.6304234266281128, "learning_rate": 9.225021261209898e-05, "loss": 3.1276, "step": 2692 }, { "epoch": 0.2208762490392006, "grad_norm": 0.5603763461112976, "learning_rate": 9.223570742127278e-05, "loss": 3.068, "step": 2694 }, { "epoch": 0.22104022546758903, "grad_norm": 0.5506424307823181, "learning_rate": 9.222118981113607e-05, "loss": 3.1429, "step": 2696 }, { "epoch": 0.22120420189597745, "grad_norm": 0.5758050084114075, "learning_rate": 9.220665978595775e-05, "loss": 3.137, "step": 2698 }, { "epoch": 0.22136817832436587, "grad_norm": 0.5011979341506958, "learning_rate": 9.219211735001034e-05, "loss": 3.1048, "step": 2700 }, { "epoch": 0.22153215475275428, "grad_norm": 0.65125972032547, "learning_rate": 9.217756250756996e-05, "loss": 3.1607, "step": 2702 }, { "epoch": 0.2216961311811427, "grad_norm": 0.5995632410049438, "learning_rate": 9.21629952629165e-05, "loss": 3.055, "step": 2704 }, { "epoch": 0.22186010760953112, "grad_norm": 0.7397205233573914, "learning_rate": 9.214841562033338e-05, "loss": 3.1355, "step": 2706 }, { "epoch": 0.22202408403791954, "grad_norm": 0.636867880821228, "learning_rate": 9.213382358410771e-05, "loss": 3.158, "step": 2708 }, { "epoch": 0.22218806046630796, "grad_norm": 0.6258612275123596, "learning_rate": 9.211921915853026e-05, "loss": 3.1435, "step": 2710 }, { "epoch": 0.22235203689469638, "grad_norm": 0.6227878332138062, "learning_rate": 9.210460234789542e-05, "loss": 3.115, "step": 2712 }, { "epoch": 0.2225160133230848, "grad_norm": 0.552367091178894, "learning_rate": 9.20899731565012e-05, "loss": 3.1096, "step": 2714 }, { "epoch": 0.22267998975147324, "grad_norm": 0.5357968211174011, "learning_rate": 9.207533158864934e-05, "loss": 3.1431, "step": 2716 }, { "epoch": 0.22284396617986166, "grad_norm": 0.5799859166145325, "learning_rate": 9.206067764864512e-05, "loss": 3.1239, "step": 2718 }, { "epoch": 0.22300794260825008, "grad_norm": 0.541117787361145, "learning_rate": 9.204601134079749e-05, "loss": 3.1551, "step": 2720 }, { "epoch": 0.2231719190366385, "grad_norm": 0.5454208254814148, "learning_rate": 9.203133266941906e-05, "loss": 3.1585, "step": 2722 }, { "epoch": 0.2233358954650269, "grad_norm": 0.6066946983337402, "learning_rate": 9.201664163882605e-05, "loss": 3.1415, "step": 2724 }, { "epoch": 0.22349987189341533, "grad_norm": 0.5827730298042297, "learning_rate": 9.200193825333833e-05, "loss": 3.0677, "step": 2726 }, { "epoch": 0.22366384832180375, "grad_norm": 0.6678103804588318, "learning_rate": 9.198722251727941e-05, "loss": 3.1344, "step": 2728 }, { "epoch": 0.22382782475019217, "grad_norm": 0.553477942943573, "learning_rate": 9.197249443497638e-05, "loss": 3.1403, "step": 2730 }, { "epoch": 0.22399180117858059, "grad_norm": 0.6305515766143799, "learning_rate": 9.195775401076001e-05, "loss": 3.1172, "step": 2732 }, { "epoch": 0.224155777606969, "grad_norm": 0.6065593957901001, "learning_rate": 9.194300124896471e-05, "loss": 3.1931, "step": 2734 }, { "epoch": 0.22431975403535742, "grad_norm": 0.5823774933815002, "learning_rate": 9.192823615392848e-05, "loss": 3.1133, "step": 2736 }, { "epoch": 0.22448373046374584, "grad_norm": 0.5522893667221069, "learning_rate": 9.191345872999297e-05, "loss": 3.111, "step": 2738 }, { "epoch": 0.22464770689213426, "grad_norm": 0.5573318600654602, "learning_rate": 9.189866898150343e-05, "loss": 3.1446, "step": 2740 }, { "epoch": 0.22481168332052268, "grad_norm": 0.5337832570075989, "learning_rate": 9.188386691280875e-05, "loss": 3.1285, "step": 2742 }, { "epoch": 0.2249756597489111, "grad_norm": 0.4911380112171173, "learning_rate": 9.186905252826147e-05, "loss": 3.0945, "step": 2744 }, { "epoch": 0.2251396361772995, "grad_norm": 0.546593189239502, "learning_rate": 9.18542258322177e-05, "loss": 3.1581, "step": 2746 }, { "epoch": 0.22530361260568793, "grad_norm": 0.5859017372131348, "learning_rate": 9.183938682903721e-05, "loss": 3.1265, "step": 2748 }, { "epoch": 0.22546758903407635, "grad_norm": 0.610236406326294, "learning_rate": 9.182453552308335e-05, "loss": 3.1202, "step": 2750 }, { "epoch": 0.22563156546246477, "grad_norm": 0.521246075630188, "learning_rate": 9.180967191872315e-05, "loss": 3.1225, "step": 2752 }, { "epoch": 0.22579554189085319, "grad_norm": 0.5413455367088318, "learning_rate": 9.179479602032719e-05, "loss": 3.1589, "step": 2754 }, { "epoch": 0.2259595183192416, "grad_norm": 0.5279058814048767, "learning_rate": 9.177990783226969e-05, "loss": 3.1119, "step": 2756 }, { "epoch": 0.22612349474763002, "grad_norm": 0.5241829752922058, "learning_rate": 9.17650073589285e-05, "loss": 3.0924, "step": 2758 }, { "epoch": 0.22628747117601844, "grad_norm": 0.5258579850196838, "learning_rate": 9.175009460468507e-05, "loss": 3.1179, "step": 2760 }, { "epoch": 0.22645144760440686, "grad_norm": 0.5945084095001221, "learning_rate": 9.173516957392446e-05, "loss": 3.1142, "step": 2762 }, { "epoch": 0.22661542403279528, "grad_norm": 0.49606871604919434, "learning_rate": 9.172023227103533e-05, "loss": 3.1269, "step": 2764 }, { "epoch": 0.2267794004611837, "grad_norm": 0.5690228343009949, "learning_rate": 9.170528270040996e-05, "loss": 3.1252, "step": 2766 }, { "epoch": 0.2269433768895721, "grad_norm": 0.5027005672454834, "learning_rate": 9.169032086644425e-05, "loss": 3.1549, "step": 2768 }, { "epoch": 0.22710735331796053, "grad_norm": 0.5609970092773438, "learning_rate": 9.167534677353768e-05, "loss": 3.1085, "step": 2770 }, { "epoch": 0.22727132974634895, "grad_norm": 0.5917537212371826, "learning_rate": 9.166036042609336e-05, "loss": 3.1457, "step": 2772 }, { "epoch": 0.2274353061747374, "grad_norm": 0.4679684042930603, "learning_rate": 9.164536182851797e-05, "loss": 3.1672, "step": 2774 }, { "epoch": 0.2275992826031258, "grad_norm": 0.5247348546981812, "learning_rate": 9.163035098522182e-05, "loss": 3.0683, "step": 2776 }, { "epoch": 0.22776325903151423, "grad_norm": 0.5608956813812256, "learning_rate": 9.161532790061882e-05, "loss": 3.093, "step": 2778 }, { "epoch": 0.22792723545990265, "grad_norm": 0.6004567742347717, "learning_rate": 9.160029257912646e-05, "loss": 3.0853, "step": 2780 }, { "epoch": 0.22809121188829107, "grad_norm": 0.4759785532951355, "learning_rate": 9.158524502516586e-05, "loss": 3.1141, "step": 2782 }, { "epoch": 0.22825518831667949, "grad_norm": 0.5630537867546082, "learning_rate": 9.15701852431617e-05, "loss": 3.112, "step": 2784 }, { "epoch": 0.2284191647450679, "grad_norm": 0.5082862377166748, "learning_rate": 9.15551132375423e-05, "loss": 3.0943, "step": 2786 }, { "epoch": 0.22858314117345632, "grad_norm": 0.5193647146224976, "learning_rate": 9.15400290127395e-05, "loss": 3.1019, "step": 2788 }, { "epoch": 0.22874711760184474, "grad_norm": 0.604705274105072, "learning_rate": 9.152493257318882e-05, "loss": 3.0763, "step": 2790 }, { "epoch": 0.22891109403023316, "grad_norm": 0.6304996013641357, "learning_rate": 9.150982392332932e-05, "loss": 3.1561, "step": 2792 }, { "epoch": 0.22907507045862158, "grad_norm": 0.6620004177093506, "learning_rate": 9.149470306760368e-05, "loss": 3.1256, "step": 2794 }, { "epoch": 0.22923904688701, "grad_norm": 0.5921337604522705, "learning_rate": 9.147957001045813e-05, "loss": 3.14, "step": 2796 }, { "epoch": 0.2294030233153984, "grad_norm": 0.5974056720733643, "learning_rate": 9.146442475634252e-05, "loss": 3.1025, "step": 2798 }, { "epoch": 0.22956699974378683, "grad_norm": 0.5777150988578796, "learning_rate": 9.144926730971027e-05, "loss": 3.0568, "step": 2800 }, { "epoch": 0.22973097617217525, "grad_norm": 0.5552829504013062, "learning_rate": 9.143409767501839e-05, "loss": 3.1045, "step": 2802 }, { "epoch": 0.22989495260056367, "grad_norm": 0.5311617255210876, "learning_rate": 9.141891585672748e-05, "loss": 3.1206, "step": 2804 }, { "epoch": 0.23005892902895209, "grad_norm": 0.5451520681381226, "learning_rate": 9.140372185930172e-05, "loss": 3.1293, "step": 2806 }, { "epoch": 0.2302229054573405, "grad_norm": 0.5763382315635681, "learning_rate": 9.138851568720886e-05, "loss": 3.1129, "step": 2808 }, { "epoch": 0.23038688188572892, "grad_norm": 0.5894972681999207, "learning_rate": 9.137329734492026e-05, "loss": 3.0796, "step": 2810 }, { "epoch": 0.23055085831411734, "grad_norm": 0.538650631904602, "learning_rate": 9.135806683691082e-05, "loss": 3.09, "step": 2812 }, { "epoch": 0.23071483474250576, "grad_norm": 0.5035321116447449, "learning_rate": 9.134282416765905e-05, "loss": 3.1168, "step": 2814 }, { "epoch": 0.23087881117089418, "grad_norm": 0.5038688778877258, "learning_rate": 9.132756934164699e-05, "loss": 3.1212, "step": 2816 }, { "epoch": 0.2310427875992826, "grad_norm": 0.53536057472229, "learning_rate": 9.131230236336032e-05, "loss": 3.118, "step": 2818 }, { "epoch": 0.231206764027671, "grad_norm": 0.5447813272476196, "learning_rate": 9.129702323728824e-05, "loss": 3.0866, "step": 2820 }, { "epoch": 0.23137074045605943, "grad_norm": 0.49734705686569214, "learning_rate": 9.128173196792355e-05, "loss": 3.1489, "step": 2822 }, { "epoch": 0.23153471688444785, "grad_norm": 0.5162733793258667, "learning_rate": 9.12664285597626e-05, "loss": 3.166, "step": 2824 }, { "epoch": 0.23169869331283627, "grad_norm": 0.47742417454719543, "learning_rate": 9.125111301730534e-05, "loss": 3.0757, "step": 2826 }, { "epoch": 0.2318626697412247, "grad_norm": 0.5511021018028259, "learning_rate": 9.123578534505525e-05, "loss": 3.1382, "step": 2828 }, { "epoch": 0.2320266461696131, "grad_norm": 0.6152271628379822, "learning_rate": 9.122044554751942e-05, "loss": 3.1326, "step": 2830 }, { "epoch": 0.23219062259800155, "grad_norm": 0.576244592666626, "learning_rate": 9.120509362920846e-05, "loss": 3.1151, "step": 2832 }, { "epoch": 0.23235459902638997, "grad_norm": 0.6472841501235962, "learning_rate": 9.118972959463656e-05, "loss": 3.1018, "step": 2834 }, { "epoch": 0.2325185754547784, "grad_norm": 0.5974353551864624, "learning_rate": 9.11743534483215e-05, "loss": 3.1377, "step": 2836 }, { "epoch": 0.2326825518831668, "grad_norm": 0.5625829696655273, "learning_rate": 9.115896519478458e-05, "loss": 3.1366, "step": 2838 }, { "epoch": 0.23284652831155522, "grad_norm": 0.6244992613792419, "learning_rate": 9.11435648385507e-05, "loss": 3.1386, "step": 2840 }, { "epoch": 0.23301050473994364, "grad_norm": 0.6011447310447693, "learning_rate": 9.11281523841483e-05, "loss": 3.1042, "step": 2842 }, { "epoch": 0.23317448116833206, "grad_norm": 0.6402640342712402, "learning_rate": 9.111272783610934e-05, "loss": 3.1381, "step": 2844 }, { "epoch": 0.23333845759672048, "grad_norm": 0.689268171787262, "learning_rate": 9.109729119896941e-05, "loss": 3.152, "step": 2846 }, { "epoch": 0.2335024340251089, "grad_norm": 0.6104257702827454, "learning_rate": 9.108184247726759e-05, "loss": 3.0882, "step": 2848 }, { "epoch": 0.23366641045349731, "grad_norm": 0.490530401468277, "learning_rate": 9.106638167554657e-05, "loss": 3.1375, "step": 2850 }, { "epoch": 0.23383038688188573, "grad_norm": 0.5417265892028809, "learning_rate": 9.105090879835254e-05, "loss": 3.0875, "step": 2852 }, { "epoch": 0.23399436331027415, "grad_norm": 0.5406416654586792, "learning_rate": 9.103542385023526e-05, "loss": 3.1689, "step": 2854 }, { "epoch": 0.23415833973866257, "grad_norm": 0.5073980093002319, "learning_rate": 9.101992683574805e-05, "loss": 3.1425, "step": 2856 }, { "epoch": 0.234322316167051, "grad_norm": 0.5920371413230896, "learning_rate": 9.100441775944779e-05, "loss": 3.1296, "step": 2858 }, { "epoch": 0.2344862925954394, "grad_norm": 0.4810742735862732, "learning_rate": 9.098889662589485e-05, "loss": 3.0661, "step": 2860 }, { "epoch": 0.23465026902382782, "grad_norm": 0.5148147344589233, "learning_rate": 9.097336343965321e-05, "loss": 3.0586, "step": 2862 }, { "epoch": 0.23481424545221624, "grad_norm": 0.5372908115386963, "learning_rate": 9.095781820529036e-05, "loss": 3.0886, "step": 2864 }, { "epoch": 0.23497822188060466, "grad_norm": 0.5466518402099609, "learning_rate": 9.094226092737734e-05, "loss": 3.1166, "step": 2866 }, { "epoch": 0.23514219830899308, "grad_norm": 0.6220472455024719, "learning_rate": 9.092669161048873e-05, "loss": 3.0579, "step": 2868 }, { "epoch": 0.2353061747373815, "grad_norm": 0.5313682556152344, "learning_rate": 9.091111025920266e-05, "loss": 3.0914, "step": 2870 }, { "epoch": 0.23547015116576991, "grad_norm": 0.5322121381759644, "learning_rate": 9.089551687810076e-05, "loss": 3.1197, "step": 2872 }, { "epoch": 0.23563412759415833, "grad_norm": 0.5401471257209778, "learning_rate": 9.087991147176827e-05, "loss": 3.0361, "step": 2874 }, { "epoch": 0.23579810402254675, "grad_norm": 0.5407954454421997, "learning_rate": 9.086429404479389e-05, "loss": 3.0943, "step": 2876 }, { "epoch": 0.23596208045093517, "grad_norm": 0.6507935523986816, "learning_rate": 9.084866460176991e-05, "loss": 3.1444, "step": 2878 }, { "epoch": 0.2361260568793236, "grad_norm": 0.621780276298523, "learning_rate": 9.08330231472921e-05, "loss": 3.1094, "step": 2880 }, { "epoch": 0.236290033307712, "grad_norm": 0.560219943523407, "learning_rate": 9.081736968595982e-05, "loss": 3.1433, "step": 2882 }, { "epoch": 0.23645400973610042, "grad_norm": 0.5493839979171753, "learning_rate": 9.080170422237593e-05, "loss": 3.0879, "step": 2884 }, { "epoch": 0.23661798616448884, "grad_norm": 0.4997730851173401, "learning_rate": 9.07860267611468e-05, "loss": 3.0705, "step": 2886 }, { "epoch": 0.2367819625928773, "grad_norm": 0.5000733137130737, "learning_rate": 9.077033730688239e-05, "loss": 3.0918, "step": 2888 }, { "epoch": 0.2369459390212657, "grad_norm": 0.595166027545929, "learning_rate": 9.075463586419613e-05, "loss": 3.1018, "step": 2890 }, { "epoch": 0.23710991544965412, "grad_norm": 0.5767890214920044, "learning_rate": 9.073892243770497e-05, "loss": 3.0718, "step": 2892 }, { "epoch": 0.23727389187804254, "grad_norm": 0.544654369354248, "learning_rate": 9.072319703202942e-05, "loss": 3.0892, "step": 2894 }, { "epoch": 0.23743786830643096, "grad_norm": 0.6508696675300598, "learning_rate": 9.070745965179353e-05, "loss": 3.1152, "step": 2896 }, { "epoch": 0.23760184473481938, "grad_norm": 0.6514599323272705, "learning_rate": 9.06917103016248e-05, "loss": 3.1539, "step": 2898 }, { "epoch": 0.2377658211632078, "grad_norm": 0.5342041254043579, "learning_rate": 9.06759489861543e-05, "loss": 3.0977, "step": 2900 }, { "epoch": 0.23792979759159621, "grad_norm": 0.5804237127304077, "learning_rate": 9.066017571001662e-05, "loss": 3.0767, "step": 2902 }, { "epoch": 0.23809377401998463, "grad_norm": 0.5167868137359619, "learning_rate": 9.064439047784982e-05, "loss": 3.0855, "step": 2904 }, { "epoch": 0.23825775044837305, "grad_norm": 0.5301772356033325, "learning_rate": 9.062859329429556e-05, "loss": 3.1147, "step": 2906 }, { "epoch": 0.23842172687676147, "grad_norm": 0.5427364110946655, "learning_rate": 9.061278416399895e-05, "loss": 3.1045, "step": 2908 }, { "epoch": 0.2385857033051499, "grad_norm": 0.5556970834732056, "learning_rate": 9.059696309160859e-05, "loss": 3.1235, "step": 2910 }, { "epoch": 0.2387496797335383, "grad_norm": 0.5361714363098145, "learning_rate": 9.058113008177667e-05, "loss": 3.1411, "step": 2912 }, { "epoch": 0.23891365616192672, "grad_norm": 0.7139540910720825, "learning_rate": 9.056528513915882e-05, "loss": 3.1739, "step": 2914 }, { "epoch": 0.23907763259031514, "grad_norm": 0.6499415040016174, "learning_rate": 9.054942826841427e-05, "loss": 3.0815, "step": 2916 }, { "epoch": 0.23924160901870356, "grad_norm": 0.6187708973884583, "learning_rate": 9.05335594742056e-05, "loss": 3.1197, "step": 2918 }, { "epoch": 0.23940558544709198, "grad_norm": 0.5267696976661682, "learning_rate": 9.051767876119906e-05, "loss": 3.1279, "step": 2920 }, { "epoch": 0.2395695618754804, "grad_norm": 0.5782443284988403, "learning_rate": 9.050178613406432e-05, "loss": 3.1206, "step": 2922 }, { "epoch": 0.23973353830386882, "grad_norm": 0.5910431742668152, "learning_rate": 9.048588159747457e-05, "loss": 3.11, "step": 2924 }, { "epoch": 0.23989751473225723, "grad_norm": 0.5470311641693115, "learning_rate": 9.046996515610649e-05, "loss": 3.0588, "step": 2926 }, { "epoch": 0.24006149116064565, "grad_norm": 0.6402561068534851, "learning_rate": 9.045403681464028e-05, "loss": 3.131, "step": 2928 }, { "epoch": 0.24022546758903407, "grad_norm": 0.5332674980163574, "learning_rate": 9.043809657775964e-05, "loss": 3.1398, "step": 2930 }, { "epoch": 0.2403894440174225, "grad_norm": 0.5881835222244263, "learning_rate": 9.042214445015176e-05, "loss": 3.1354, "step": 2932 }, { "epoch": 0.2405534204458109, "grad_norm": 0.6585120558738708, "learning_rate": 9.04061804365073e-05, "loss": 3.1273, "step": 2934 }, { "epoch": 0.24071739687419932, "grad_norm": 0.6403549313545227, "learning_rate": 9.039020454152047e-05, "loss": 3.1051, "step": 2936 }, { "epoch": 0.24088137330258774, "grad_norm": 0.60472571849823, "learning_rate": 9.037421676988893e-05, "loss": 3.1076, "step": 2938 }, { "epoch": 0.24104534973097616, "grad_norm": 0.5805239081382751, "learning_rate": 9.035821712631385e-05, "loss": 3.1201, "step": 2940 }, { "epoch": 0.24120932615936458, "grad_norm": 0.4733094274997711, "learning_rate": 9.034220561549988e-05, "loss": 3.127, "step": 2942 }, { "epoch": 0.241373302587753, "grad_norm": 0.6691949963569641, "learning_rate": 9.03261822421552e-05, "loss": 3.1115, "step": 2944 }, { "epoch": 0.24153727901614144, "grad_norm": 0.6166451573371887, "learning_rate": 9.031014701099139e-05, "loss": 3.1177, "step": 2946 }, { "epoch": 0.24170125544452986, "grad_norm": 0.7083244323730469, "learning_rate": 9.029409992672359e-05, "loss": 3.095, "step": 2948 }, { "epoch": 0.24186523187291828, "grad_norm": 0.6332703232765198, "learning_rate": 9.027804099407045e-05, "loss": 3.1122, "step": 2950 }, { "epoch": 0.2420292083013067, "grad_norm": 0.6068108081817627, "learning_rate": 9.026197021775402e-05, "loss": 3.0873, "step": 2952 }, { "epoch": 0.24219318472969512, "grad_norm": 0.5226258039474487, "learning_rate": 9.024588760249988e-05, "loss": 3.0131, "step": 2954 }, { "epoch": 0.24235716115808353, "grad_norm": 0.5775647163391113, "learning_rate": 9.02297931530371e-05, "loss": 3.115, "step": 2956 }, { "epoch": 0.24252113758647195, "grad_norm": 0.5021248459815979, "learning_rate": 9.021368687409819e-05, "loss": 3.0398, "step": 2958 }, { "epoch": 0.24268511401486037, "grad_norm": 0.5804054737091064, "learning_rate": 9.019756877041918e-05, "loss": 3.1158, "step": 2960 }, { "epoch": 0.2428490904432488, "grad_norm": 0.5360262989997864, "learning_rate": 9.018143884673957e-05, "loss": 3.1476, "step": 2962 }, { "epoch": 0.2430130668716372, "grad_norm": 0.5107494592666626, "learning_rate": 9.016529710780231e-05, "loss": 3.0919, "step": 2964 }, { "epoch": 0.24317704330002562, "grad_norm": 0.5997065305709839, "learning_rate": 9.014914355835384e-05, "loss": 3.08, "step": 2966 }, { "epoch": 0.24334101972841404, "grad_norm": 0.5437501668930054, "learning_rate": 9.013297820314408e-05, "loss": 3.1194, "step": 2968 }, { "epoch": 0.24350499615680246, "grad_norm": 0.5322654843330383, "learning_rate": 9.01168010469264e-05, "loss": 3.0669, "step": 2970 }, { "epoch": 0.24366897258519088, "grad_norm": 0.48639851808547974, "learning_rate": 9.010061209445769e-05, "loss": 3.1127, "step": 2972 }, { "epoch": 0.2438329490135793, "grad_norm": 0.4471394717693329, "learning_rate": 9.008441135049823e-05, "loss": 3.1262, "step": 2974 }, { "epoch": 0.24399692544196772, "grad_norm": 0.4837088882923126, "learning_rate": 9.006819881981184e-05, "loss": 3.0944, "step": 2976 }, { "epoch": 0.24416090187035613, "grad_norm": 0.4812915027141571, "learning_rate": 9.005197450716577e-05, "loss": 3.0651, "step": 2978 }, { "epoch": 0.24432487829874455, "grad_norm": 0.49380356073379517, "learning_rate": 9.003573841733075e-05, "loss": 3.1066, "step": 2980 }, { "epoch": 0.24448885472713297, "grad_norm": 0.49508631229400635, "learning_rate": 9.001949055508094e-05, "loss": 3.0422, "step": 2982 }, { "epoch": 0.2446528311555214, "grad_norm": 0.5182914137840271, "learning_rate": 9.0003230925194e-05, "loss": 3.1087, "step": 2984 }, { "epoch": 0.2448168075839098, "grad_norm": 0.5734208226203918, "learning_rate": 8.998695953245103e-05, "loss": 3.0798, "step": 2986 }, { "epoch": 0.24498078401229822, "grad_norm": 0.5737510919570923, "learning_rate": 8.99706763816366e-05, "loss": 3.1035, "step": 2988 }, { "epoch": 0.24514476044068664, "grad_norm": 0.5363655686378479, "learning_rate": 8.995438147753874e-05, "loss": 3.1577, "step": 2990 }, { "epoch": 0.24530873686907506, "grad_norm": 0.5472906231880188, "learning_rate": 8.993807482494892e-05, "loss": 3.1101, "step": 2992 }, { "epoch": 0.24547271329746348, "grad_norm": 0.5381254553794861, "learning_rate": 8.992175642866208e-05, "loss": 3.0842, "step": 2994 }, { "epoch": 0.2456366897258519, "grad_norm": 0.5339920520782471, "learning_rate": 8.990542629347658e-05, "loss": 3.0531, "step": 2996 }, { "epoch": 0.24580066615424032, "grad_norm": 0.5434836149215698, "learning_rate": 8.988908442419429e-05, "loss": 3.0259, "step": 2998 }, { "epoch": 0.24596464258262873, "grad_norm": 0.5441123247146606, "learning_rate": 8.987273082562048e-05, "loss": 3.0843, "step": 3000 }, { "epoch": 0.24612861901101718, "grad_norm": 0.5503421425819397, "learning_rate": 8.98563655025639e-05, "loss": 3.09, "step": 3002 }, { "epoch": 0.2462925954394056, "grad_norm": 0.521301805973053, "learning_rate": 8.983998845983672e-05, "loss": 3.0932, "step": 3004 }, { "epoch": 0.24645657186779402, "grad_norm": 0.47878655791282654, "learning_rate": 8.982359970225458e-05, "loss": 3.0644, "step": 3006 }, { "epoch": 0.24662054829618243, "grad_norm": 0.56011962890625, "learning_rate": 8.980719923463654e-05, "loss": 3.0469, "step": 3008 }, { "epoch": 0.24678452472457085, "grad_norm": 0.5374152064323425, "learning_rate": 8.979078706180515e-05, "loss": 3.0857, "step": 3010 }, { "epoch": 0.24694850115295927, "grad_norm": 0.524940013885498, "learning_rate": 8.977436318858635e-05, "loss": 3.046, "step": 3012 }, { "epoch": 0.2471124775813477, "grad_norm": 0.4905287027359009, "learning_rate": 8.975792761980954e-05, "loss": 3.0903, "step": 3014 }, { "epoch": 0.2472764540097361, "grad_norm": 0.5330361127853394, "learning_rate": 8.974148036030758e-05, "loss": 3.0994, "step": 3016 }, { "epoch": 0.24744043043812453, "grad_norm": 0.5203661322593689, "learning_rate": 8.972502141491673e-05, "loss": 3.0716, "step": 3018 }, { "epoch": 0.24760440686651294, "grad_norm": 0.5972068905830383, "learning_rate": 8.970855078847669e-05, "loss": 3.1228, "step": 3020 }, { "epoch": 0.24776838329490136, "grad_norm": 0.5131882429122925, "learning_rate": 8.969206848583062e-05, "loss": 3.0532, "step": 3022 }, { "epoch": 0.24793235972328978, "grad_norm": 0.5639394521713257, "learning_rate": 8.967557451182514e-05, "loss": 3.0587, "step": 3024 }, { "epoch": 0.2480963361516782, "grad_norm": 0.5577993392944336, "learning_rate": 8.965906887131022e-05, "loss": 3.0912, "step": 3026 }, { "epoch": 0.24826031258006662, "grad_norm": 0.5181630849838257, "learning_rate": 8.964255156913933e-05, "loss": 3.0889, "step": 3028 }, { "epoch": 0.24842428900845503, "grad_norm": 0.5294365286827087, "learning_rate": 8.96260226101693e-05, "loss": 3.0869, "step": 3030 }, { "epoch": 0.24858826543684345, "grad_norm": 0.513014554977417, "learning_rate": 8.960948199926048e-05, "loss": 3.1187, "step": 3032 }, { "epoch": 0.24875224186523187, "grad_norm": 0.47102218866348267, "learning_rate": 8.95929297412766e-05, "loss": 3.0829, "step": 3034 }, { "epoch": 0.2489162182936203, "grad_norm": 0.47533565759658813, "learning_rate": 8.957636584108476e-05, "loss": 3.0324, "step": 3036 }, { "epoch": 0.2490801947220087, "grad_norm": 0.5862759351730347, "learning_rate": 8.955979030355559e-05, "loss": 3.1245, "step": 3038 }, { "epoch": 0.24924417115039713, "grad_norm": 0.5523611307144165, "learning_rate": 8.954320313356306e-05, "loss": 3.0173, "step": 3040 }, { "epoch": 0.24940814757878554, "grad_norm": 0.5045341849327087, "learning_rate": 8.952660433598459e-05, "loss": 3.0735, "step": 3042 }, { "epoch": 0.24957212400717396, "grad_norm": 0.5677223205566406, "learning_rate": 8.950999391570103e-05, "loss": 3.0802, "step": 3044 }, { "epoch": 0.24973610043556238, "grad_norm": 0.5546299815177917, "learning_rate": 8.949337187759663e-05, "loss": 3.0347, "step": 3046 }, { "epoch": 0.2499000768639508, "grad_norm": 0.5589927434921265, "learning_rate": 8.947673822655906e-05, "loss": 3.1115, "step": 3048 }, { "epoch": 0.2500640532923392, "grad_norm": 0.5995765328407288, "learning_rate": 8.946009296747942e-05, "loss": 3.0568, "step": 3050 }, { "epoch": 0.25022802972072766, "grad_norm": 0.5527830719947815, "learning_rate": 8.944343610525216e-05, "loss": 3.0868, "step": 3052 }, { "epoch": 0.25039200614911605, "grad_norm": 0.4991224408149719, "learning_rate": 8.942676764477524e-05, "loss": 3.0589, "step": 3054 }, { "epoch": 0.2505559825775045, "grad_norm": 0.5033073425292969, "learning_rate": 8.941008759094998e-05, "loss": 3.0778, "step": 3056 }, { "epoch": 0.2507199590058929, "grad_norm": 0.49312669038772583, "learning_rate": 8.939339594868109e-05, "loss": 3.0657, "step": 3058 }, { "epoch": 0.25088393543428134, "grad_norm": 0.49565747380256653, "learning_rate": 8.937669272287672e-05, "loss": 3.0625, "step": 3060 }, { "epoch": 0.2510479118626697, "grad_norm": 0.5610074996948242, "learning_rate": 8.935997791844842e-05, "loss": 3.0784, "step": 3062 }, { "epoch": 0.25121188829105817, "grad_norm": 0.5718916058540344, "learning_rate": 8.93432515403111e-05, "loss": 3.0565, "step": 3064 }, { "epoch": 0.25137586471944656, "grad_norm": 0.5423455238342285, "learning_rate": 8.932651359338316e-05, "loss": 3.0458, "step": 3066 }, { "epoch": 0.251539841147835, "grad_norm": 0.5095962285995483, "learning_rate": 8.930976408258633e-05, "loss": 3.0652, "step": 3068 }, { "epoch": 0.2517038175762234, "grad_norm": 0.522039532661438, "learning_rate": 8.929300301284578e-05, "loss": 3.0416, "step": 3070 }, { "epoch": 0.25186779400461184, "grad_norm": 0.5137038230895996, "learning_rate": 8.927623038909004e-05, "loss": 3.0422, "step": 3072 }, { "epoch": 0.25203177043300024, "grad_norm": 0.4940985441207886, "learning_rate": 8.925944621625109e-05, "loss": 3.0576, "step": 3074 }, { "epoch": 0.2521957468613887, "grad_norm": 0.492217093706131, "learning_rate": 8.924265049926423e-05, "loss": 3.0481, "step": 3076 }, { "epoch": 0.25235972328977707, "grad_norm": 0.5192411541938782, "learning_rate": 8.922584324306827e-05, "loss": 3.087, "step": 3078 }, { "epoch": 0.2525236997181655, "grad_norm": 0.47307392954826355, "learning_rate": 8.920902445260528e-05, "loss": 3.0104, "step": 3080 }, { "epoch": 0.2526876761465539, "grad_norm": 0.4889049828052521, "learning_rate": 8.919219413282083e-05, "loss": 3.0765, "step": 3082 }, { "epoch": 0.25285165257494235, "grad_norm": 0.4941463768482208, "learning_rate": 8.917535228866379e-05, "loss": 3.0438, "step": 3084 }, { "epoch": 0.25301562900333074, "grad_norm": 0.49077731370925903, "learning_rate": 8.915849892508652e-05, "loss": 3.0448, "step": 3086 }, { "epoch": 0.2531796054317192, "grad_norm": 0.5119606256484985, "learning_rate": 8.914163404704466e-05, "loss": 3.0407, "step": 3088 }, { "epoch": 0.25334358186010764, "grad_norm": 0.5803630352020264, "learning_rate": 8.912475765949733e-05, "loss": 3.072, "step": 3090 }, { "epoch": 0.253507558288496, "grad_norm": 0.5256679654121399, "learning_rate": 8.910786976740697e-05, "loss": 3.1345, "step": 3092 }, { "epoch": 0.2536715347168845, "grad_norm": 0.5043202042579651, "learning_rate": 8.909097037573941e-05, "loss": 3.0407, "step": 3094 }, { "epoch": 0.25383551114527286, "grad_norm": 0.536882221698761, "learning_rate": 8.907405948946393e-05, "loss": 3.0808, "step": 3096 }, { "epoch": 0.2539994875736613, "grad_norm": 0.5040213465690613, "learning_rate": 8.905713711355308e-05, "loss": 3.0875, "step": 3098 }, { "epoch": 0.2541634640020497, "grad_norm": 0.5036368370056152, "learning_rate": 8.904020325298286e-05, "loss": 3.0399, "step": 3100 }, { "epoch": 0.25432744043043815, "grad_norm": 0.5161576271057129, "learning_rate": 8.902325791273265e-05, "loss": 3.0826, "step": 3102 }, { "epoch": 0.25449141685882654, "grad_norm": 0.49616822600364685, "learning_rate": 8.900630109778517e-05, "loss": 3.0633, "step": 3104 }, { "epoch": 0.254655393287215, "grad_norm": 0.4793374240398407, "learning_rate": 8.898933281312653e-05, "loss": 3.081, "step": 3106 }, { "epoch": 0.25481936971560337, "grad_norm": 0.46786659955978394, "learning_rate": 8.897235306374625e-05, "loss": 3.0785, "step": 3108 }, { "epoch": 0.2549833461439918, "grad_norm": 0.45357832312583923, "learning_rate": 8.895536185463713e-05, "loss": 3.0654, "step": 3110 }, { "epoch": 0.2551473225723802, "grad_norm": 0.5247970223426819, "learning_rate": 8.893835919079543e-05, "loss": 3.1274, "step": 3112 }, { "epoch": 0.25531129900076865, "grad_norm": 0.5221189260482788, "learning_rate": 8.892134507722074e-05, "loss": 3.0446, "step": 3114 }, { "epoch": 0.25547527542915704, "grad_norm": 0.518396258354187, "learning_rate": 8.8904319518916e-05, "loss": 3.1165, "step": 3116 }, { "epoch": 0.2556392518575455, "grad_norm": 0.4879421591758728, "learning_rate": 8.888728252088758e-05, "loss": 3.0649, "step": 3118 }, { "epoch": 0.2558032282859339, "grad_norm": 0.47542670369148254, "learning_rate": 8.887023408814512e-05, "loss": 3.0965, "step": 3120 }, { "epoch": 0.2559672047143223, "grad_norm": 0.5255916714668274, "learning_rate": 8.88531742257017e-05, "loss": 3.0032, "step": 3122 }, { "epoch": 0.2561311811427107, "grad_norm": 0.5671265125274658, "learning_rate": 8.883610293857371e-05, "loss": 3.0821, "step": 3124 }, { "epoch": 0.25629515757109916, "grad_norm": 0.5040590167045593, "learning_rate": 8.881902023178094e-05, "loss": 3.0581, "step": 3126 }, { "epoch": 0.25645913399948755, "grad_norm": 0.5780110359191895, "learning_rate": 8.880192611034652e-05, "loss": 3.0678, "step": 3128 }, { "epoch": 0.256623110427876, "grad_norm": 0.5004504323005676, "learning_rate": 8.878482057929693e-05, "loss": 3.0434, "step": 3130 }, { "epoch": 0.2567870868562644, "grad_norm": 0.5049440860748291, "learning_rate": 8.876770364366201e-05, "loss": 3.0637, "step": 3132 }, { "epoch": 0.25695106328465284, "grad_norm": 0.51917964220047, "learning_rate": 8.875057530847497e-05, "loss": 3.0346, "step": 3134 }, { "epoch": 0.2571150397130412, "grad_norm": 0.5143879055976868, "learning_rate": 8.873343557877234e-05, "loss": 3.016, "step": 3136 }, { "epoch": 0.2572790161414297, "grad_norm": 0.4929054081439972, "learning_rate": 8.871628445959402e-05, "loss": 3.0746, "step": 3138 }, { "epoch": 0.25744299256981806, "grad_norm": 0.4992254972457886, "learning_rate": 8.869912195598326e-05, "loss": 3.0855, "step": 3140 }, { "epoch": 0.2576069689982065, "grad_norm": 0.5007113814353943, "learning_rate": 8.868194807298664e-05, "loss": 2.9942, "step": 3142 }, { "epoch": 0.2577709454265949, "grad_norm": 0.5016566514968872, "learning_rate": 8.866476281565413e-05, "loss": 3.0523, "step": 3144 }, { "epoch": 0.25793492185498335, "grad_norm": 0.5850445032119751, "learning_rate": 8.864756618903898e-05, "loss": 3.0497, "step": 3146 }, { "epoch": 0.2580988982833718, "grad_norm": 0.5336945056915283, "learning_rate": 8.863035819819784e-05, "loss": 3.0543, "step": 3148 }, { "epoch": 0.2582628747117602, "grad_norm": 0.5204024314880371, "learning_rate": 8.861313884819066e-05, "loss": 3.0131, "step": 3150 }, { "epoch": 0.25842685114014863, "grad_norm": 0.49752482771873474, "learning_rate": 8.859590814408078e-05, "loss": 3.0386, "step": 3152 }, { "epoch": 0.258590827568537, "grad_norm": 0.5874549746513367, "learning_rate": 8.857866609093484e-05, "loss": 3.1003, "step": 3154 }, { "epoch": 0.25875480399692546, "grad_norm": 0.5115360617637634, "learning_rate": 8.856141269382281e-05, "loss": 2.9792, "step": 3156 }, { "epoch": 0.25891878042531385, "grad_norm": 0.5269978046417236, "learning_rate": 8.8544147957818e-05, "loss": 3.0692, "step": 3158 }, { "epoch": 0.2590827568537023, "grad_norm": 0.5046514868736267, "learning_rate": 8.852687188799709e-05, "loss": 3.0371, "step": 3160 }, { "epoch": 0.2592467332820907, "grad_norm": 0.570195198059082, "learning_rate": 8.850958448944007e-05, "loss": 3.0922, "step": 3162 }, { "epoch": 0.25941070971047914, "grad_norm": 0.5811053514480591, "learning_rate": 8.849228576723024e-05, "loss": 3.0337, "step": 3164 }, { "epoch": 0.2595746861388675, "grad_norm": 0.4887376129627228, "learning_rate": 8.847497572645424e-05, "loss": 3.0332, "step": 3166 }, { "epoch": 0.259738662567256, "grad_norm": 0.5527136325836182, "learning_rate": 8.845765437220209e-05, "loss": 3.029, "step": 3168 }, { "epoch": 0.25990263899564436, "grad_norm": 0.5504634380340576, "learning_rate": 8.844032170956707e-05, "loss": 3.0602, "step": 3170 }, { "epoch": 0.2600666154240328, "grad_norm": 0.48267602920532227, "learning_rate": 8.842297774364579e-05, "loss": 3.0176, "step": 3172 }, { "epoch": 0.2602305918524212, "grad_norm": 0.5267589092254639, "learning_rate": 8.840562247953822e-05, "loss": 3.082, "step": 3174 }, { "epoch": 0.26039456828080965, "grad_norm": 0.48394930362701416, "learning_rate": 8.838825592234763e-05, "loss": 3.0773, "step": 3176 }, { "epoch": 0.26055854470919804, "grad_norm": 0.5486636757850647, "learning_rate": 8.837087807718062e-05, "loss": 3.0657, "step": 3178 }, { "epoch": 0.2607225211375865, "grad_norm": 0.6029240489006042, "learning_rate": 8.835348894914712e-05, "loss": 3.0705, "step": 3180 }, { "epoch": 0.2608864975659749, "grad_norm": 0.5119585394859314, "learning_rate": 8.833608854336032e-05, "loss": 3.0849, "step": 3182 }, { "epoch": 0.2610504739943633, "grad_norm": 0.5734114050865173, "learning_rate": 8.831867686493682e-05, "loss": 3.093, "step": 3184 }, { "epoch": 0.2612144504227517, "grad_norm": 0.5184375047683716, "learning_rate": 8.830125391899645e-05, "loss": 3.0587, "step": 3186 }, { "epoch": 0.26137842685114016, "grad_norm": 0.5614577531814575, "learning_rate": 8.828381971066238e-05, "loss": 3.022, "step": 3188 }, { "epoch": 0.26154240327952855, "grad_norm": 0.4999874532222748, "learning_rate": 8.82663742450611e-05, "loss": 3.0534, "step": 3190 }, { "epoch": 0.261706379707917, "grad_norm": 0.5082796812057495, "learning_rate": 8.824891752732246e-05, "loss": 3.0002, "step": 3192 }, { "epoch": 0.2618703561363054, "grad_norm": 0.4463191628456116, "learning_rate": 8.82314495625795e-05, "loss": 3.1116, "step": 3194 }, { "epoch": 0.26203433256469383, "grad_norm": 0.49782219529151917, "learning_rate": 8.821397035596865e-05, "loss": 3.0144, "step": 3196 }, { "epoch": 0.2621983089930822, "grad_norm": 0.4964871406555176, "learning_rate": 8.819647991262965e-05, "loss": 3.0592, "step": 3198 }, { "epoch": 0.26236228542147066, "grad_norm": 0.5055323839187622, "learning_rate": 8.817897823770552e-05, "loss": 3.0538, "step": 3200 }, { "epoch": 0.2625262618498591, "grad_norm": 0.5108357071876526, "learning_rate": 8.816146533634258e-05, "loss": 3.0168, "step": 3202 }, { "epoch": 0.2626902382782475, "grad_norm": 0.5637660026550293, "learning_rate": 8.814394121369044e-05, "loss": 3.0205, "step": 3204 }, { "epoch": 0.26285421470663595, "grad_norm": 0.5199828147888184, "learning_rate": 8.812640587490206e-05, "loss": 3.0393, "step": 3206 }, { "epoch": 0.26301819113502434, "grad_norm": 0.518085241317749, "learning_rate": 8.810885932513364e-05, "loss": 3.0558, "step": 3208 }, { "epoch": 0.2631821675634128, "grad_norm": 0.5022614598274231, "learning_rate": 8.809130156954472e-05, "loss": 3.0701, "step": 3210 }, { "epoch": 0.2633461439918012, "grad_norm": 0.5334145426750183, "learning_rate": 8.807373261329809e-05, "loss": 3.0292, "step": 3212 }, { "epoch": 0.2635101204201896, "grad_norm": 0.5729058980941772, "learning_rate": 8.80561524615599e-05, "loss": 3.0583, "step": 3214 }, { "epoch": 0.263674096848578, "grad_norm": 0.5852642059326172, "learning_rate": 8.803856111949952e-05, "loss": 3.0467, "step": 3216 }, { "epoch": 0.26383807327696646, "grad_norm": 0.5108850598335266, "learning_rate": 8.802095859228965e-05, "loss": 2.9989, "step": 3218 }, { "epoch": 0.26400204970535485, "grad_norm": 0.532211184501648, "learning_rate": 8.800334488510629e-05, "loss": 3.0702, "step": 3220 }, { "epoch": 0.2641660261337433, "grad_norm": 0.5329665541648865, "learning_rate": 8.798572000312868e-05, "loss": 3.0789, "step": 3222 }, { "epoch": 0.2643300025621317, "grad_norm": 0.5084935426712036, "learning_rate": 8.796808395153939e-05, "loss": 2.9789, "step": 3224 }, { "epoch": 0.26449397899052013, "grad_norm": 0.6284180879592896, "learning_rate": 8.795043673552426e-05, "loss": 3.0542, "step": 3226 }, { "epoch": 0.2646579554189085, "grad_norm": 0.491621732711792, "learning_rate": 8.793277836027243e-05, "loss": 3.0203, "step": 3228 }, { "epoch": 0.26482193184729697, "grad_norm": 0.5157349705696106, "learning_rate": 8.791510883097627e-05, "loss": 2.9835, "step": 3230 }, { "epoch": 0.26498590827568536, "grad_norm": 0.5752792954444885, "learning_rate": 8.789742815283147e-05, "loss": 3.0707, "step": 3232 }, { "epoch": 0.2651498847040738, "grad_norm": 0.5733649730682373, "learning_rate": 8.787973633103701e-05, "loss": 2.9905, "step": 3234 }, { "epoch": 0.2653138611324622, "grad_norm": 0.5354986786842346, "learning_rate": 8.786203337079512e-05, "loss": 3.0373, "step": 3236 }, { "epoch": 0.26547783756085064, "grad_norm": 0.590827226638794, "learning_rate": 8.78443192773113e-05, "loss": 3.1053, "step": 3238 }, { "epoch": 0.26564181398923903, "grad_norm": 0.5951526165008545, "learning_rate": 8.782659405579437e-05, "loss": 3.0664, "step": 3240 }, { "epoch": 0.2658057904176275, "grad_norm": 0.5225715637207031, "learning_rate": 8.780885771145635e-05, "loss": 3.0216, "step": 3242 }, { "epoch": 0.26596976684601586, "grad_norm": 0.5698503851890564, "learning_rate": 8.77911102495126e-05, "loss": 3.0631, "step": 3244 }, { "epoch": 0.2661337432744043, "grad_norm": 0.4748436212539673, "learning_rate": 8.777335167518172e-05, "loss": 3.0373, "step": 3246 }, { "epoch": 0.2662977197027927, "grad_norm": 0.5692675709724426, "learning_rate": 8.775558199368556e-05, "loss": 3.0603, "step": 3248 }, { "epoch": 0.26646169613118115, "grad_norm": 0.5429494380950928, "learning_rate": 8.773780121024925e-05, "loss": 3.0812, "step": 3250 }, { "epoch": 0.26662567255956954, "grad_norm": 0.5639028549194336, "learning_rate": 8.77200093301012e-05, "loss": 3.0548, "step": 3252 }, { "epoch": 0.266789648987958, "grad_norm": 0.4472223222255707, "learning_rate": 8.770220635847308e-05, "loss": 3.0257, "step": 3254 }, { "epoch": 0.2669536254163464, "grad_norm": 0.5361818671226501, "learning_rate": 8.76843923005998e-05, "loss": 3.0535, "step": 3256 }, { "epoch": 0.2671176018447348, "grad_norm": 0.49390971660614014, "learning_rate": 8.766656716171952e-05, "loss": 3.0318, "step": 3258 }, { "epoch": 0.26728157827312327, "grad_norm": 0.47352907061576843, "learning_rate": 8.764873094707371e-05, "loss": 3.024, "step": 3260 }, { "epoch": 0.26744555470151166, "grad_norm": 0.5356292128562927, "learning_rate": 8.763088366190709e-05, "loss": 3.0469, "step": 3262 }, { "epoch": 0.2676095311299001, "grad_norm": 0.5446259379386902, "learning_rate": 8.761302531146754e-05, "loss": 3.0701, "step": 3264 }, { "epoch": 0.2677735075582885, "grad_norm": 0.481741338968277, "learning_rate": 8.759515590100633e-05, "loss": 3.0365, "step": 3266 }, { "epoch": 0.26793748398667694, "grad_norm": 0.4720764756202698, "learning_rate": 8.75772754357779e-05, "loss": 3.0117, "step": 3268 }, { "epoch": 0.26810146041506533, "grad_norm": 0.4751954674720764, "learning_rate": 8.755938392103993e-05, "loss": 3.0229, "step": 3270 }, { "epoch": 0.2682654368434538, "grad_norm": 0.5223908424377441, "learning_rate": 8.754148136205343e-05, "loss": 3.0563, "step": 3272 }, { "epoch": 0.26842941327184217, "grad_norm": 0.5289605855941772, "learning_rate": 8.752356776408254e-05, "loss": 3.0188, "step": 3274 }, { "epoch": 0.2685933897002306, "grad_norm": 0.4922078549861908, "learning_rate": 8.750564313239478e-05, "loss": 3.0092, "step": 3276 }, { "epoch": 0.268757366128619, "grad_norm": 0.5029440522193909, "learning_rate": 8.74877074722608e-05, "loss": 3.0981, "step": 3278 }, { "epoch": 0.26892134255700745, "grad_norm": 0.5761165618896484, "learning_rate": 8.746976078895457e-05, "loss": 3.0617, "step": 3280 }, { "epoch": 0.26908531898539584, "grad_norm": 0.5281015634536743, "learning_rate": 8.745180308775325e-05, "loss": 3.0612, "step": 3282 }, { "epoch": 0.2692492954137843, "grad_norm": 0.4676494896411896, "learning_rate": 8.743383437393725e-05, "loss": 3.0523, "step": 3284 }, { "epoch": 0.2694132718421727, "grad_norm": 0.4948801100254059, "learning_rate": 8.741585465279025e-05, "loss": 3.037, "step": 3286 }, { "epoch": 0.2695772482705611, "grad_norm": 0.5158532857894897, "learning_rate": 8.739786392959914e-05, "loss": 3.0052, "step": 3288 }, { "epoch": 0.2697412246989495, "grad_norm": 0.4951574504375458, "learning_rate": 8.737986220965403e-05, "loss": 3.0039, "step": 3290 }, { "epoch": 0.26990520112733796, "grad_norm": 0.4828198254108429, "learning_rate": 8.736184949824832e-05, "loss": 3.046, "step": 3292 }, { "epoch": 0.27006917755572635, "grad_norm": 0.5514748096466064, "learning_rate": 8.734382580067856e-05, "loss": 3.0188, "step": 3294 }, { "epoch": 0.2702331539841148, "grad_norm": 0.5289325714111328, "learning_rate": 8.732579112224464e-05, "loss": 2.9917, "step": 3296 }, { "epoch": 0.2703971304125032, "grad_norm": 0.4985904395580292, "learning_rate": 8.730774546824953e-05, "loss": 3.0484, "step": 3298 }, { "epoch": 0.27056110684089163, "grad_norm": 0.5238404870033264, "learning_rate": 8.728968884399959e-05, "loss": 3.0856, "step": 3300 }, { "epoch": 0.27072508326928, "grad_norm": 0.5596960186958313, "learning_rate": 8.727162125480429e-05, "loss": 3.0124, "step": 3302 }, { "epoch": 0.27088905969766847, "grad_norm": 0.5070179104804993, "learning_rate": 8.725354270597636e-05, "loss": 3.0299, "step": 3304 }, { "epoch": 0.27105303612605686, "grad_norm": 0.4900921583175659, "learning_rate": 8.723545320283178e-05, "loss": 3.0523, "step": 3306 }, { "epoch": 0.2712170125544453, "grad_norm": 0.5997633934020996, "learning_rate": 8.721735275068968e-05, "loss": 3.111, "step": 3308 }, { "epoch": 0.2713809889828337, "grad_norm": 0.6209022998809814, "learning_rate": 8.719924135487249e-05, "loss": 3.0585, "step": 3310 }, { "epoch": 0.27154496541122214, "grad_norm": 0.5038576722145081, "learning_rate": 8.718111902070583e-05, "loss": 3.0472, "step": 3312 }, { "epoch": 0.27170894183961053, "grad_norm": 0.5371909141540527, "learning_rate": 8.716298575351852e-05, "loss": 2.9892, "step": 3314 }, { "epoch": 0.271872918267999, "grad_norm": 0.5439016222953796, "learning_rate": 8.714484155864257e-05, "loss": 3.025, "step": 3316 }, { "epoch": 0.2720368946963874, "grad_norm": 0.6247979998588562, "learning_rate": 8.71266864414133e-05, "loss": 3.0627, "step": 3318 }, { "epoch": 0.2722008711247758, "grad_norm": 0.6789233684539795, "learning_rate": 8.710852040716915e-05, "loss": 3.0713, "step": 3320 }, { "epoch": 0.27236484755316426, "grad_norm": 0.6147254705429077, "learning_rate": 8.709034346125178e-05, "loss": 3.0732, "step": 3322 }, { "epoch": 0.27252882398155265, "grad_norm": 0.5249021649360657, "learning_rate": 8.707215560900612e-05, "loss": 3.0765, "step": 3324 }, { "epoch": 0.2726928004099411, "grad_norm": 0.5897969603538513, "learning_rate": 8.705395685578022e-05, "loss": 3.0331, "step": 3326 }, { "epoch": 0.2728567768383295, "grad_norm": 0.5916758179664612, "learning_rate": 8.703574720692541e-05, "loss": 3.0078, "step": 3328 }, { "epoch": 0.27302075326671793, "grad_norm": 0.48185595870018005, "learning_rate": 8.701752666779619e-05, "loss": 3.0312, "step": 3330 }, { "epoch": 0.2731847296951063, "grad_norm": 0.5248916745185852, "learning_rate": 8.699929524375025e-05, "loss": 3.0062, "step": 3332 }, { "epoch": 0.27334870612349477, "grad_norm": 0.5910229086875916, "learning_rate": 8.698105294014853e-05, "loss": 3.0185, "step": 3334 }, { "epoch": 0.27351268255188316, "grad_norm": 0.5539065003395081, "learning_rate": 8.696279976235512e-05, "loss": 3.0376, "step": 3336 }, { "epoch": 0.2736766589802716, "grad_norm": 0.5597767233848572, "learning_rate": 8.694453571573731e-05, "loss": 2.9856, "step": 3338 }, { "epoch": 0.27384063540866, "grad_norm": 0.4913848638534546, "learning_rate": 8.692626080566561e-05, "loss": 3.0781, "step": 3340 }, { "epoch": 0.27400461183704844, "grad_norm": 0.5226843357086182, "learning_rate": 8.690797503751373e-05, "loss": 3.05, "step": 3342 }, { "epoch": 0.27416858826543683, "grad_norm": 0.5141190886497498, "learning_rate": 8.688967841665853e-05, "loss": 3.0245, "step": 3344 }, { "epoch": 0.2743325646938253, "grad_norm": 0.5239917635917664, "learning_rate": 8.68713709484801e-05, "loss": 3.0523, "step": 3346 }, { "epoch": 0.27449654112221367, "grad_norm": 0.533658504486084, "learning_rate": 8.685305263836172e-05, "loss": 3.0296, "step": 3348 }, { "epoch": 0.2746605175506021, "grad_norm": 0.5123451948165894, "learning_rate": 8.683472349168982e-05, "loss": 3.0271, "step": 3350 }, { "epoch": 0.2748244939789905, "grad_norm": 0.5194646120071411, "learning_rate": 8.681638351385407e-05, "loss": 3.0006, "step": 3352 }, { "epoch": 0.27498847040737895, "grad_norm": 0.5386354923248291, "learning_rate": 8.679803271024729e-05, "loss": 3.0237, "step": 3354 }, { "epoch": 0.27515244683576734, "grad_norm": 0.5998788475990295, "learning_rate": 8.677967108626547e-05, "loss": 3.0206, "step": 3356 }, { "epoch": 0.2753164232641558, "grad_norm": 0.576684296131134, "learning_rate": 8.676129864730784e-05, "loss": 3.036, "step": 3358 }, { "epoch": 0.2754803996925442, "grad_norm": 0.5040565729141235, "learning_rate": 8.674291539877674e-05, "loss": 3.0111, "step": 3360 }, { "epoch": 0.2756443761209326, "grad_norm": 0.4546044170856476, "learning_rate": 8.672452134607772e-05, "loss": 3.0263, "step": 3362 }, { "epoch": 0.275808352549321, "grad_norm": 0.4776202440261841, "learning_rate": 8.670611649461953e-05, "loss": 3.0355, "step": 3364 }, { "epoch": 0.27597232897770946, "grad_norm": 0.5171165466308594, "learning_rate": 8.668770084981408e-05, "loss": 3.0517, "step": 3366 }, { "epoch": 0.27613630540609785, "grad_norm": 0.613608181476593, "learning_rate": 8.666927441707639e-05, "loss": 3.0085, "step": 3368 }, { "epoch": 0.2763002818344863, "grad_norm": 0.5442901849746704, "learning_rate": 8.665083720182479e-05, "loss": 3.0408, "step": 3370 }, { "epoch": 0.2764642582628747, "grad_norm": 0.4858104884624481, "learning_rate": 8.663238920948065e-05, "loss": 3.0797, "step": 3372 }, { "epoch": 0.27662823469126313, "grad_norm": 0.599068284034729, "learning_rate": 8.661393044546855e-05, "loss": 3.0034, "step": 3374 }, { "epoch": 0.2767922111196516, "grad_norm": 0.6478520631790161, "learning_rate": 8.659546091521628e-05, "loss": 3.0538, "step": 3376 }, { "epoch": 0.27695618754803997, "grad_norm": 0.5216221809387207, "learning_rate": 8.657698062415473e-05, "loss": 2.9927, "step": 3378 }, { "epoch": 0.2771201639764284, "grad_norm": 0.49026909470558167, "learning_rate": 8.655848957771801e-05, "loss": 2.9977, "step": 3380 }, { "epoch": 0.2772841404048168, "grad_norm": 0.5494085550308228, "learning_rate": 8.653998778134338e-05, "loss": 3.0324, "step": 3382 }, { "epoch": 0.27744811683320525, "grad_norm": 0.4823310375213623, "learning_rate": 8.652147524047121e-05, "loss": 3.0145, "step": 3384 }, { "epoch": 0.27761209326159364, "grad_norm": 0.5301584005355835, "learning_rate": 8.65029519605451e-05, "loss": 3.073, "step": 3386 }, { "epoch": 0.2777760696899821, "grad_norm": 0.5409163236618042, "learning_rate": 8.648441794701176e-05, "loss": 3.0718, "step": 3388 }, { "epoch": 0.2779400461183705, "grad_norm": 0.542854905128479, "learning_rate": 8.646587320532109e-05, "loss": 3.0202, "step": 3390 }, { "epoch": 0.2781040225467589, "grad_norm": 0.5100975036621094, "learning_rate": 8.644731774092611e-05, "loss": 3.0215, "step": 3392 }, { "epoch": 0.2782679989751473, "grad_norm": 0.4922786355018616, "learning_rate": 8.642875155928302e-05, "loss": 2.9681, "step": 3394 }, { "epoch": 0.27843197540353576, "grad_norm": 0.4704815149307251, "learning_rate": 8.641017466585115e-05, "loss": 2.9829, "step": 3396 }, { "epoch": 0.27859595183192415, "grad_norm": 0.4583336114883423, "learning_rate": 8.639158706609301e-05, "loss": 2.9813, "step": 3398 }, { "epoch": 0.2787599282603126, "grad_norm": 0.46999305486679077, "learning_rate": 8.637298876547423e-05, "loss": 3.0119, "step": 3400 }, { "epoch": 0.278923904688701, "grad_norm": 0.4925120770931244, "learning_rate": 8.63543797694636e-05, "loss": 3.0416, "step": 3402 }, { "epoch": 0.27908788111708943, "grad_norm": 0.44865623116493225, "learning_rate": 8.633576008353307e-05, "loss": 3.0126, "step": 3404 }, { "epoch": 0.2792518575454778, "grad_norm": 0.4903569519519806, "learning_rate": 8.631712971315769e-05, "loss": 2.993, "step": 3406 }, { "epoch": 0.27941583397386627, "grad_norm": 0.5053166747093201, "learning_rate": 8.629848866381566e-05, "loss": 3.0154, "step": 3408 }, { "epoch": 0.27957981040225466, "grad_norm": 0.5207493305206299, "learning_rate": 8.627983694098836e-05, "loss": 3.0152, "step": 3410 }, { "epoch": 0.2797437868306431, "grad_norm": 0.4896704852581024, "learning_rate": 8.626117455016029e-05, "loss": 3.0206, "step": 3412 }, { "epoch": 0.2799077632590315, "grad_norm": 0.43985670804977417, "learning_rate": 8.62425014968191e-05, "loss": 3.0113, "step": 3414 }, { "epoch": 0.28007173968741994, "grad_norm": 0.4357748031616211, "learning_rate": 8.62238177864555e-05, "loss": 2.9796, "step": 3416 }, { "epoch": 0.28023571611580833, "grad_norm": 0.5827761888504028, "learning_rate": 8.620512342456344e-05, "loss": 3.0356, "step": 3418 }, { "epoch": 0.2803996925441968, "grad_norm": 0.5295268297195435, "learning_rate": 8.618641841663995e-05, "loss": 3.0138, "step": 3420 }, { "epoch": 0.28056366897258517, "grad_norm": 0.5759023427963257, "learning_rate": 8.616770276818515e-05, "loss": 3.0662, "step": 3422 }, { "epoch": 0.2807276454009736, "grad_norm": 0.5547720789909363, "learning_rate": 8.61489764847024e-05, "loss": 2.9373, "step": 3424 }, { "epoch": 0.280891621829362, "grad_norm": 0.5790712833404541, "learning_rate": 8.613023957169805e-05, "loss": 2.9677, "step": 3426 }, { "epoch": 0.28105559825775045, "grad_norm": 0.5202155709266663, "learning_rate": 8.611149203468169e-05, "loss": 2.9925, "step": 3428 }, { "epoch": 0.28121957468613884, "grad_norm": 0.48871245980262756, "learning_rate": 8.609273387916599e-05, "loss": 3.0403, "step": 3430 }, { "epoch": 0.2813835511145273, "grad_norm": 0.5515206456184387, "learning_rate": 8.607396511066672e-05, "loss": 3.0231, "step": 3432 }, { "epoch": 0.28154752754291573, "grad_norm": 0.5459672212600708, "learning_rate": 8.605518573470281e-05, "loss": 2.9938, "step": 3434 }, { "epoch": 0.2817115039713041, "grad_norm": 0.5675147771835327, "learning_rate": 8.603639575679627e-05, "loss": 3.004, "step": 3436 }, { "epoch": 0.28187548039969257, "grad_norm": 0.5069471597671509, "learning_rate": 8.601759518247228e-05, "loss": 3.0038, "step": 3438 }, { "epoch": 0.28203945682808096, "grad_norm": 0.4854438900947571, "learning_rate": 8.599878401725907e-05, "loss": 3.0183, "step": 3440 }, { "epoch": 0.2822034332564694, "grad_norm": 0.50712651014328, "learning_rate": 8.597996226668803e-05, "loss": 3.0431, "step": 3442 }, { "epoch": 0.2823674096848578, "grad_norm": 0.5265794396400452, "learning_rate": 8.596112993629368e-05, "loss": 3.025, "step": 3444 }, { "epoch": 0.28253138611324624, "grad_norm": 0.5010355710983276, "learning_rate": 8.594228703161358e-05, "loss": 3.0151, "step": 3446 }, { "epoch": 0.28269536254163463, "grad_norm": 0.5020803809165955, "learning_rate": 8.592343355818848e-05, "loss": 3.0397, "step": 3448 }, { "epoch": 0.2828593389700231, "grad_norm": 0.5143887996673584, "learning_rate": 8.590456952156216e-05, "loss": 3.0433, "step": 3450 }, { "epoch": 0.28302331539841147, "grad_norm": 0.46669018268585205, "learning_rate": 8.588569492728158e-05, "loss": 2.9783, "step": 3452 }, { "epoch": 0.2831872918267999, "grad_norm": 0.5204837322235107, "learning_rate": 8.586680978089675e-05, "loss": 3.0218, "step": 3454 }, { "epoch": 0.2833512682551883, "grad_norm": 0.5977779030799866, "learning_rate": 8.584791408796081e-05, "loss": 2.9815, "step": 3456 }, { "epoch": 0.28351524468357675, "grad_norm": 0.4907354414463043, "learning_rate": 8.582900785403e-05, "loss": 3.0183, "step": 3458 }, { "epoch": 0.28367922111196514, "grad_norm": 0.4910911023616791, "learning_rate": 8.581009108466365e-05, "loss": 2.9656, "step": 3460 }, { "epoch": 0.2838431975403536, "grad_norm": 0.4608771502971649, "learning_rate": 8.579116378542418e-05, "loss": 3.0116, "step": 3462 }, { "epoch": 0.284007173968742, "grad_norm": 0.5098771452903748, "learning_rate": 8.577222596187713e-05, "loss": 2.9916, "step": 3464 }, { "epoch": 0.2841711503971304, "grad_norm": 0.4630301296710968, "learning_rate": 8.575327761959111e-05, "loss": 3.014, "step": 3466 }, { "epoch": 0.2843351268255188, "grad_norm": 0.4914288818836212, "learning_rate": 8.573431876413786e-05, "loss": 3.0314, "step": 3468 }, { "epoch": 0.28449910325390726, "grad_norm": 0.5059999227523804, "learning_rate": 8.571534940109215e-05, "loss": 2.9912, "step": 3470 }, { "epoch": 0.28466307968229565, "grad_norm": 0.48450663685798645, "learning_rate": 8.569636953603193e-05, "loss": 3.0338, "step": 3472 }, { "epoch": 0.2848270561106841, "grad_norm": 0.5315168499946594, "learning_rate": 8.567737917453814e-05, "loss": 3.0025, "step": 3474 }, { "epoch": 0.2849910325390725, "grad_norm": 0.6352331042289734, "learning_rate": 8.565837832219486e-05, "loss": 3.0111, "step": 3476 }, { "epoch": 0.28515500896746093, "grad_norm": 0.5802767276763916, "learning_rate": 8.563936698458924e-05, "loss": 2.9683, "step": 3478 }, { "epoch": 0.2853189853958493, "grad_norm": 0.49376073479652405, "learning_rate": 8.562034516731155e-05, "loss": 3.0061, "step": 3480 }, { "epoch": 0.28548296182423777, "grad_norm": 0.6267409920692444, "learning_rate": 8.560131287595508e-05, "loss": 3.0239, "step": 3482 }, { "epoch": 0.28564693825262616, "grad_norm": 0.4985465407371521, "learning_rate": 8.558227011611624e-05, "loss": 2.9816, "step": 3484 }, { "epoch": 0.2858109146810146, "grad_norm": 0.5097251534461975, "learning_rate": 8.55632168933945e-05, "loss": 3.0022, "step": 3486 }, { "epoch": 0.28597489110940305, "grad_norm": 0.4792355000972748, "learning_rate": 8.554415321339245e-05, "loss": 2.9994, "step": 3488 }, { "epoch": 0.28613886753779144, "grad_norm": 0.4744062125682831, "learning_rate": 8.552507908171567e-05, "loss": 3.0114, "step": 3490 }, { "epoch": 0.2863028439661799, "grad_norm": 0.5052275657653809, "learning_rate": 8.55059945039729e-05, "loss": 2.9496, "step": 3492 }, { "epoch": 0.2864668203945683, "grad_norm": 0.49946439266204834, "learning_rate": 8.548689948577589e-05, "loss": 3.0197, "step": 3494 }, { "epoch": 0.2866307968229567, "grad_norm": 0.5956889390945435, "learning_rate": 8.546779403273952e-05, "loss": 3.0002, "step": 3496 }, { "epoch": 0.2867947732513451, "grad_norm": 0.5495591163635254, "learning_rate": 8.544867815048166e-05, "loss": 2.9783, "step": 3498 }, { "epoch": 0.28695874967973356, "grad_norm": 0.5237768292427063, "learning_rate": 8.542955184462334e-05, "loss": 2.9502, "step": 3500 }, { "epoch": 0.28712272610812195, "grad_norm": 0.5177441835403442, "learning_rate": 8.541041512078856e-05, "loss": 3.031, "step": 3502 }, { "epoch": 0.2872867025365104, "grad_norm": 0.49246928095817566, "learning_rate": 8.539126798460443e-05, "loss": 3.0439, "step": 3504 }, { "epoch": 0.2874506789648988, "grad_norm": 0.4482136368751526, "learning_rate": 8.537211044170118e-05, "loss": 2.9854, "step": 3506 }, { "epoch": 0.28761465539328723, "grad_norm": 0.44845181703567505, "learning_rate": 8.535294249771195e-05, "loss": 3.0106, "step": 3508 }, { "epoch": 0.2877786318216756, "grad_norm": 0.4529576897621155, "learning_rate": 8.533376415827311e-05, "loss": 2.9691, "step": 3510 }, { "epoch": 0.28794260825006407, "grad_norm": 0.4587607979774475, "learning_rate": 8.531457542902397e-05, "loss": 3.0012, "step": 3512 }, { "epoch": 0.28810658467845246, "grad_norm": 0.46685096621513367, "learning_rate": 8.52953763156069e-05, "loss": 2.9333, "step": 3514 }, { "epoch": 0.2882705611068409, "grad_norm": 0.4905169904232025, "learning_rate": 8.527616682366743e-05, "loss": 3.0578, "step": 3516 }, { "epoch": 0.2884345375352293, "grad_norm": 0.42239660024642944, "learning_rate": 8.5256946958854e-05, "loss": 2.8983, "step": 3518 }, { "epoch": 0.28859851396361774, "grad_norm": 0.4612962007522583, "learning_rate": 8.523771672681819e-05, "loss": 3.015, "step": 3520 }, { "epoch": 0.28876249039200613, "grad_norm": 0.4948538541793823, "learning_rate": 8.521847613321461e-05, "loss": 3.0051, "step": 3522 }, { "epoch": 0.2889264668203946, "grad_norm": 0.48041197657585144, "learning_rate": 8.51992251837009e-05, "loss": 2.9799, "step": 3524 }, { "epoch": 0.28909044324878297, "grad_norm": 0.4689272344112396, "learning_rate": 8.517996388393776e-05, "loss": 2.9721, "step": 3526 }, { "epoch": 0.2892544196771714, "grad_norm": 0.49251675605773926, "learning_rate": 8.516069223958895e-05, "loss": 3.0053, "step": 3528 }, { "epoch": 0.2894183961055598, "grad_norm": 0.5214609503746033, "learning_rate": 8.514141025632121e-05, "loss": 2.9999, "step": 3530 }, { "epoch": 0.28958237253394825, "grad_norm": 0.5497042536735535, "learning_rate": 8.51221179398044e-05, "loss": 3.0013, "step": 3532 }, { "epoch": 0.28974634896233664, "grad_norm": 0.5290588140487671, "learning_rate": 8.510281529571135e-05, "loss": 2.9671, "step": 3534 }, { "epoch": 0.2899103253907251, "grad_norm": 0.5457175970077515, "learning_rate": 8.508350232971798e-05, "loss": 2.9757, "step": 3536 }, { "epoch": 0.2900743018191135, "grad_norm": 0.47880294919013977, "learning_rate": 8.506417904750321e-05, "loss": 3.0044, "step": 3538 }, { "epoch": 0.2902382782475019, "grad_norm": 0.454750120639801, "learning_rate": 8.504484545474902e-05, "loss": 2.9854, "step": 3540 }, { "epoch": 0.2904022546758903, "grad_norm": 0.5449289679527283, "learning_rate": 8.502550155714039e-05, "loss": 3.0371, "step": 3542 }, { "epoch": 0.29056623110427876, "grad_norm": 0.4773004651069641, "learning_rate": 8.500614736036536e-05, "loss": 3.0289, "step": 3544 }, { "epoch": 0.2907302075326672, "grad_norm": 0.515762984752655, "learning_rate": 8.498678287011497e-05, "loss": 3.0323, "step": 3546 }, { "epoch": 0.2908941839610556, "grad_norm": 0.502001941204071, "learning_rate": 8.496740809208332e-05, "loss": 3.0344, "step": 3548 }, { "epoch": 0.29105816038944404, "grad_norm": 0.46195709705352783, "learning_rate": 8.494802303196751e-05, "loss": 2.9701, "step": 3550 }, { "epoch": 0.29122213681783243, "grad_norm": 0.43657612800598145, "learning_rate": 8.492862769546768e-05, "loss": 2.9849, "step": 3552 }, { "epoch": 0.2913861132462209, "grad_norm": 0.4737352132797241, "learning_rate": 8.490922208828698e-05, "loss": 3.0034, "step": 3554 }, { "epoch": 0.29155008967460927, "grad_norm": 0.47753196954727173, "learning_rate": 8.488980621613157e-05, "loss": 2.9693, "step": 3556 }, { "epoch": 0.2917140661029977, "grad_norm": 0.4862592816352844, "learning_rate": 8.487038008471066e-05, "loss": 3.0158, "step": 3558 }, { "epoch": 0.2918780425313861, "grad_norm": 0.5238935351371765, "learning_rate": 8.485094369973644e-05, "loss": 3.0186, "step": 3560 }, { "epoch": 0.29204201895977455, "grad_norm": 0.47579601407051086, "learning_rate": 8.483149706692415e-05, "loss": 2.9984, "step": 3562 }, { "epoch": 0.29220599538816294, "grad_norm": 0.45539939403533936, "learning_rate": 8.481204019199203e-05, "loss": 3.0183, "step": 3564 }, { "epoch": 0.2923699718165514, "grad_norm": 0.4498404860496521, "learning_rate": 8.479257308066129e-05, "loss": 2.9848, "step": 3566 }, { "epoch": 0.2925339482449398, "grad_norm": 0.5307414531707764, "learning_rate": 8.477309573865623e-05, "loss": 3.0112, "step": 3568 }, { "epoch": 0.2926979246733282, "grad_norm": 0.4621909558773041, "learning_rate": 8.47536081717041e-05, "loss": 3.0196, "step": 3570 }, { "epoch": 0.2928619011017166, "grad_norm": 0.4816957712173462, "learning_rate": 8.473411038553518e-05, "loss": 3.037, "step": 3572 }, { "epoch": 0.29302587753010506, "grad_norm": 0.4664321541786194, "learning_rate": 8.471460238588275e-05, "loss": 2.9409, "step": 3574 }, { "epoch": 0.29318985395849345, "grad_norm": 0.5383502244949341, "learning_rate": 8.469508417848309e-05, "loss": 3.0063, "step": 3576 }, { "epoch": 0.2933538303868819, "grad_norm": 0.5679525136947632, "learning_rate": 8.467555576907547e-05, "loss": 2.9717, "step": 3578 }, { "epoch": 0.2935178068152703, "grad_norm": 0.5316475629806519, "learning_rate": 8.465601716340217e-05, "loss": 2.9873, "step": 3580 }, { "epoch": 0.29368178324365873, "grad_norm": 0.5575289726257324, "learning_rate": 8.46364683672085e-05, "loss": 3.0346, "step": 3582 }, { "epoch": 0.2938457596720471, "grad_norm": 0.49023500084877014, "learning_rate": 8.461690938624272e-05, "loss": 2.9569, "step": 3584 }, { "epoch": 0.29400973610043557, "grad_norm": 0.5434648394584656, "learning_rate": 8.45973402262561e-05, "loss": 2.9681, "step": 3586 }, { "epoch": 0.29417371252882396, "grad_norm": 0.5375614166259766, "learning_rate": 8.45777608930029e-05, "loss": 2.9967, "step": 3588 }, { "epoch": 0.2943376889572124, "grad_norm": 0.492756724357605, "learning_rate": 8.455817139224038e-05, "loss": 2.9579, "step": 3590 }, { "epoch": 0.2945016653856008, "grad_norm": 0.5231446623802185, "learning_rate": 8.453857172972881e-05, "loss": 3.0131, "step": 3592 }, { "epoch": 0.29466564181398924, "grad_norm": 0.47618967294692993, "learning_rate": 8.451896191123139e-05, "loss": 2.9707, "step": 3594 }, { "epoch": 0.29482961824237763, "grad_norm": 0.4701117277145386, "learning_rate": 8.449934194251436e-05, "loss": 2.9816, "step": 3596 }, { "epoch": 0.2949935946707661, "grad_norm": 0.5265464782714844, "learning_rate": 8.447971182934695e-05, "loss": 2.9723, "step": 3598 }, { "epoch": 0.29515757109915447, "grad_norm": 0.4834996461868286, "learning_rate": 8.446007157750129e-05, "loss": 3.0243, "step": 3600 }, { "epoch": 0.2953215475275429, "grad_norm": 0.5081709623336792, "learning_rate": 8.444042119275259e-05, "loss": 2.991, "step": 3602 }, { "epoch": 0.29548552395593136, "grad_norm": 0.4928586781024933, "learning_rate": 8.4420760680879e-05, "loss": 2.9471, "step": 3604 }, { "epoch": 0.29564950038431975, "grad_norm": 0.4916239380836487, "learning_rate": 8.440109004766164e-05, "loss": 3.0252, "step": 3606 }, { "epoch": 0.2958134768127082, "grad_norm": 0.5131044387817383, "learning_rate": 8.438140929888461e-05, "loss": 3.037, "step": 3608 }, { "epoch": 0.2959774532410966, "grad_norm": 0.5201687812805176, "learning_rate": 8.436171844033498e-05, "loss": 3.0245, "step": 3610 }, { "epoch": 0.29614142966948503, "grad_norm": 0.5022949576377869, "learning_rate": 8.434201747780281e-05, "loss": 2.9876, "step": 3612 }, { "epoch": 0.2963054060978734, "grad_norm": 0.48507484793663025, "learning_rate": 8.432230641708112e-05, "loss": 3.0006, "step": 3614 }, { "epoch": 0.29646938252626187, "grad_norm": 0.5075755715370178, "learning_rate": 8.43025852639659e-05, "loss": 2.9571, "step": 3616 }, { "epoch": 0.29663335895465026, "grad_norm": 0.5907678604125977, "learning_rate": 8.428285402425614e-05, "loss": 2.988, "step": 3618 }, { "epoch": 0.2967973353830387, "grad_norm": 0.5069214105606079, "learning_rate": 8.426311270375372e-05, "loss": 2.997, "step": 3620 }, { "epoch": 0.2969613118114271, "grad_norm": 0.48910194635391235, "learning_rate": 8.424336130826354e-05, "loss": 3.0606, "step": 3622 }, { "epoch": 0.29712528823981554, "grad_norm": 0.47257429361343384, "learning_rate": 8.422359984359347e-05, "loss": 2.9472, "step": 3624 }, { "epoch": 0.29728926466820393, "grad_norm": 0.513735830783844, "learning_rate": 8.42038283155543e-05, "loss": 2.9971, "step": 3626 }, { "epoch": 0.2974532410965924, "grad_norm": 0.4457564651966095, "learning_rate": 8.41840467299598e-05, "loss": 2.99, "step": 3628 }, { "epoch": 0.29761721752498077, "grad_norm": 0.46787014603614807, "learning_rate": 8.416425509262673e-05, "loss": 3.04, "step": 3630 }, { "epoch": 0.2977811939533692, "grad_norm": 0.4722326397895813, "learning_rate": 8.414445340937474e-05, "loss": 2.9925, "step": 3632 }, { "epoch": 0.2979451703817576, "grad_norm": 0.48793745040893555, "learning_rate": 8.41246416860265e-05, "loss": 2.9601, "step": 3634 }, { "epoch": 0.29810914681014605, "grad_norm": 0.506376326084137, "learning_rate": 8.410481992840757e-05, "loss": 2.9581, "step": 3636 }, { "epoch": 0.29827312323853444, "grad_norm": 0.48079127073287964, "learning_rate": 8.408498814234649e-05, "loss": 2.9846, "step": 3638 }, { "epoch": 0.2984370996669229, "grad_norm": 0.49062538146972656, "learning_rate": 8.406514633367478e-05, "loss": 2.9784, "step": 3640 }, { "epoch": 0.2986010760953113, "grad_norm": 0.4508991837501526, "learning_rate": 8.404529450822687e-05, "loss": 2.9467, "step": 3642 }, { "epoch": 0.2987650525236997, "grad_norm": 0.49012279510498047, "learning_rate": 8.402543267184012e-05, "loss": 3.0089, "step": 3644 }, { "epoch": 0.2989290289520881, "grad_norm": 0.48352885246276855, "learning_rate": 8.400556083035487e-05, "loss": 2.9665, "step": 3646 }, { "epoch": 0.29909300538047656, "grad_norm": 0.48871710896492004, "learning_rate": 8.39856789896144e-05, "loss": 2.9501, "step": 3648 }, { "epoch": 0.29925698180886495, "grad_norm": 0.5270124673843384, "learning_rate": 8.396578715546489e-05, "loss": 2.9884, "step": 3650 }, { "epoch": 0.2994209582372534, "grad_norm": 0.5761973261833191, "learning_rate": 8.394588533375551e-05, "loss": 2.9354, "step": 3652 }, { "epoch": 0.2995849346656418, "grad_norm": 0.5598081946372986, "learning_rate": 8.392597353033836e-05, "loss": 2.9783, "step": 3654 }, { "epoch": 0.29974891109403023, "grad_norm": 0.6018843054771423, "learning_rate": 8.39060517510684e-05, "loss": 2.9262, "step": 3656 }, { "epoch": 0.2999128875224186, "grad_norm": 0.5428453683853149, "learning_rate": 8.388612000180365e-05, "loss": 2.9805, "step": 3658 }, { "epoch": 0.30007686395080707, "grad_norm": 0.5947820544242859, "learning_rate": 8.386617828840495e-05, "loss": 3.0226, "step": 3660 }, { "epoch": 0.3002408403791955, "grad_norm": 0.5261856913566589, "learning_rate": 8.384622661673613e-05, "loss": 3.0118, "step": 3662 }, { "epoch": 0.3004048168075839, "grad_norm": 0.5318114757537842, "learning_rate": 8.382626499266393e-05, "loss": 3.0048, "step": 3664 }, { "epoch": 0.30056879323597235, "grad_norm": 0.5498486161231995, "learning_rate": 8.380629342205802e-05, "loss": 3.0076, "step": 3666 }, { "epoch": 0.30073276966436074, "grad_norm": 0.5416869521141052, "learning_rate": 8.378631191079098e-05, "loss": 3.0086, "step": 3668 }, { "epoch": 0.3008967460927492, "grad_norm": 0.5416050553321838, "learning_rate": 8.376632046473836e-05, "loss": 3.0309, "step": 3670 }, { "epoch": 0.3010607225211376, "grad_norm": 0.5648167133331299, "learning_rate": 8.374631908977856e-05, "loss": 3.0105, "step": 3672 }, { "epoch": 0.301224698949526, "grad_norm": 0.5651098489761353, "learning_rate": 8.372630779179295e-05, "loss": 2.9895, "step": 3674 }, { "epoch": 0.3013886753779144, "grad_norm": 0.5542349219322205, "learning_rate": 8.370628657666581e-05, "loss": 2.9952, "step": 3676 }, { "epoch": 0.30155265180630286, "grad_norm": 0.5925988554954529, "learning_rate": 8.368625545028435e-05, "loss": 2.9552, "step": 3678 }, { "epoch": 0.30171662823469125, "grad_norm": 0.5026484131813049, "learning_rate": 8.366621441853864e-05, "loss": 2.9809, "step": 3680 }, { "epoch": 0.3018806046630797, "grad_norm": 0.54961097240448, "learning_rate": 8.36461634873217e-05, "loss": 3.0241, "step": 3682 }, { "epoch": 0.3020445810914681, "grad_norm": 0.539864182472229, "learning_rate": 8.362610266252948e-05, "loss": 2.9108, "step": 3684 }, { "epoch": 0.30220855751985654, "grad_norm": 0.5144716501235962, "learning_rate": 8.360603195006081e-05, "loss": 2.9453, "step": 3686 }, { "epoch": 0.3023725339482449, "grad_norm": 0.5098161697387695, "learning_rate": 8.358595135581746e-05, "loss": 2.9514, "step": 3688 }, { "epoch": 0.30253651037663337, "grad_norm": 0.5552378296852112, "learning_rate": 8.356586088570402e-05, "loss": 2.9992, "step": 3690 }, { "epoch": 0.30270048680502176, "grad_norm": 0.5220159888267517, "learning_rate": 8.354576054562812e-05, "loss": 2.949, "step": 3692 }, { "epoch": 0.3028644632334102, "grad_norm": 0.5612363219261169, "learning_rate": 8.352565034150015e-05, "loss": 2.9787, "step": 3694 }, { "epoch": 0.3030284396617986, "grad_norm": 0.620829701423645, "learning_rate": 8.350553027923354e-05, "loss": 2.948, "step": 3696 }, { "epoch": 0.30319241609018704, "grad_norm": 0.5301763415336609, "learning_rate": 8.348540036474445e-05, "loss": 2.899, "step": 3698 }, { "epoch": 0.30335639251857544, "grad_norm": 0.5472487807273865, "learning_rate": 8.346526060395214e-05, "loss": 2.9334, "step": 3700 }, { "epoch": 0.3035203689469639, "grad_norm": 0.4763239920139313, "learning_rate": 8.34451110027786e-05, "loss": 2.9492, "step": 3702 }, { "epoch": 0.30368434537535227, "grad_norm": 0.4982868432998657, "learning_rate": 8.342495156714877e-05, "loss": 2.9982, "step": 3704 }, { "epoch": 0.3038483218037407, "grad_norm": 0.4549109935760498, "learning_rate": 8.34047823029905e-05, "loss": 2.9983, "step": 3706 }, { "epoch": 0.3040122982321291, "grad_norm": 0.4498273730278015, "learning_rate": 8.338460321623453e-05, "loss": 2.9843, "step": 3708 }, { "epoch": 0.30417627466051755, "grad_norm": 0.4424727261066437, "learning_rate": 8.336441431281443e-05, "loss": 2.9798, "step": 3710 }, { "epoch": 0.30434025108890594, "grad_norm": 0.4585806131362915, "learning_rate": 8.334421559866675e-05, "loss": 2.9663, "step": 3712 }, { "epoch": 0.3045042275172944, "grad_norm": 0.4909418523311615, "learning_rate": 8.332400707973082e-05, "loss": 2.9489, "step": 3714 }, { "epoch": 0.3046682039456828, "grad_norm": 0.49454036355018616, "learning_rate": 8.330378876194896e-05, "loss": 2.9942, "step": 3716 }, { "epoch": 0.3048321803740712, "grad_norm": 0.4984138011932373, "learning_rate": 8.328356065126628e-05, "loss": 2.9846, "step": 3718 }, { "epoch": 0.3049961568024597, "grad_norm": 0.5394349694252014, "learning_rate": 8.326332275363085e-05, "loss": 2.9815, "step": 3720 }, { "epoch": 0.30516013323084806, "grad_norm": 0.5089222192764282, "learning_rate": 8.324307507499352e-05, "loss": 3.0284, "step": 3722 }, { "epoch": 0.3053241096592365, "grad_norm": 0.5222364664077759, "learning_rate": 8.322281762130813e-05, "loss": 3.0041, "step": 3724 }, { "epoch": 0.3054880860876249, "grad_norm": 0.5485755801200867, "learning_rate": 8.320255039853128e-05, "loss": 2.9494, "step": 3726 }, { "epoch": 0.30565206251601335, "grad_norm": 0.664840579032898, "learning_rate": 8.318227341262254e-05, "loss": 2.9965, "step": 3728 }, { "epoch": 0.30581603894440174, "grad_norm": 0.5262017846107483, "learning_rate": 8.316198666954431e-05, "loss": 2.9494, "step": 3730 }, { "epoch": 0.3059800153727902, "grad_norm": 0.504371166229248, "learning_rate": 8.314169017526185e-05, "loss": 2.9769, "step": 3732 }, { "epoch": 0.30614399180117857, "grad_norm": 0.4443201720714569, "learning_rate": 8.312138393574328e-05, "loss": 2.9735, "step": 3734 }, { "epoch": 0.306307968229567, "grad_norm": 0.4749385714530945, "learning_rate": 8.310106795695962e-05, "loss": 2.9146, "step": 3736 }, { "epoch": 0.3064719446579554, "grad_norm": 0.4916543662548065, "learning_rate": 8.308074224488473e-05, "loss": 3.0123, "step": 3738 }, { "epoch": 0.30663592108634385, "grad_norm": 0.47765418887138367, "learning_rate": 8.306040680549534e-05, "loss": 2.9993, "step": 3740 }, { "epoch": 0.30679989751473224, "grad_norm": 0.4428040385246277, "learning_rate": 8.304006164477105e-05, "loss": 2.9951, "step": 3742 }, { "epoch": 0.3069638739431207, "grad_norm": 0.48747119307518005, "learning_rate": 8.301970676869428e-05, "loss": 2.9786, "step": 3744 }, { "epoch": 0.3071278503715091, "grad_norm": 0.47519078850746155, "learning_rate": 8.299934218325036e-05, "loss": 3.0331, "step": 3746 }, { "epoch": 0.3072918267998975, "grad_norm": 0.5639200806617737, "learning_rate": 8.297896789442742e-05, "loss": 2.9629, "step": 3748 }, { "epoch": 0.3074558032282859, "grad_norm": 0.4715816080570221, "learning_rate": 8.295858390821651e-05, "loss": 2.9719, "step": 3750 }, { "epoch": 0.30761977965667436, "grad_norm": 0.46770063042640686, "learning_rate": 8.293819023061145e-05, "loss": 2.934, "step": 3752 }, { "epoch": 0.30778375608506275, "grad_norm": 0.47951412200927734, "learning_rate": 8.291778686760899e-05, "loss": 2.9206, "step": 3754 }, { "epoch": 0.3079477325134512, "grad_norm": 0.5599160194396973, "learning_rate": 8.289737382520868e-05, "loss": 2.9962, "step": 3756 }, { "epoch": 0.3081117089418396, "grad_norm": 0.4405677318572998, "learning_rate": 8.287695110941292e-05, "loss": 2.9984, "step": 3758 }, { "epoch": 0.30827568537022804, "grad_norm": 0.4612639844417572, "learning_rate": 8.285651872622695e-05, "loss": 2.9552, "step": 3760 }, { "epoch": 0.3084396617986164, "grad_norm": 0.5191618800163269, "learning_rate": 8.283607668165888e-05, "loss": 2.9771, "step": 3762 }, { "epoch": 0.3086036382270049, "grad_norm": 0.4907604157924652, "learning_rate": 8.281562498171964e-05, "loss": 2.9342, "step": 3764 }, { "epoch": 0.30876761465539326, "grad_norm": 0.46844518184661865, "learning_rate": 8.279516363242302e-05, "loss": 2.97, "step": 3766 }, { "epoch": 0.3089315910837817, "grad_norm": 0.5155321955680847, "learning_rate": 8.277469263978562e-05, "loss": 2.9505, "step": 3768 }, { "epoch": 0.3090955675121701, "grad_norm": 0.4812948405742645, "learning_rate": 8.275421200982689e-05, "loss": 2.9261, "step": 3770 }, { "epoch": 0.30925954394055855, "grad_norm": 0.4431648254394531, "learning_rate": 8.273372174856911e-05, "loss": 3.0066, "step": 3772 }, { "epoch": 0.309423520368947, "grad_norm": 0.5174999237060547, "learning_rate": 8.271322186203739e-05, "loss": 2.9845, "step": 3774 }, { "epoch": 0.3095874967973354, "grad_norm": 0.5325742363929749, "learning_rate": 8.269271235625965e-05, "loss": 2.891, "step": 3776 }, { "epoch": 0.30975147322572383, "grad_norm": 0.49371999502182007, "learning_rate": 8.26721932372667e-05, "loss": 2.9165, "step": 3778 }, { "epoch": 0.3099154496541122, "grad_norm": 0.46111825108528137, "learning_rate": 8.265166451109213e-05, "loss": 2.9865, "step": 3780 }, { "epoch": 0.31007942608250066, "grad_norm": 0.49692320823669434, "learning_rate": 8.263112618377235e-05, "loss": 2.8851, "step": 3782 }, { "epoch": 0.31024340251088905, "grad_norm": 0.5122151970863342, "learning_rate": 8.261057826134664e-05, "loss": 3.0221, "step": 3784 }, { "epoch": 0.3104073789392775, "grad_norm": 0.4640101194381714, "learning_rate": 8.259002074985703e-05, "loss": 3.0407, "step": 3786 }, { "epoch": 0.3105713553676659, "grad_norm": 0.4947657585144043, "learning_rate": 8.256945365534841e-05, "loss": 2.9391, "step": 3788 }, { "epoch": 0.31073533179605434, "grad_norm": 0.5168522596359253, "learning_rate": 8.254887698386851e-05, "loss": 2.9593, "step": 3790 }, { "epoch": 0.3108993082244427, "grad_norm": 0.5186290740966797, "learning_rate": 8.252829074146784e-05, "loss": 3.0004, "step": 3792 }, { "epoch": 0.3110632846528312, "grad_norm": 0.5075885653495789, "learning_rate": 8.250769493419973e-05, "loss": 2.9619, "step": 3794 }, { "epoch": 0.31122726108121956, "grad_norm": 0.49640703201293945, "learning_rate": 8.248708956812033e-05, "loss": 3.0015, "step": 3796 }, { "epoch": 0.311391237509608, "grad_norm": 0.4813206195831299, "learning_rate": 8.246647464928863e-05, "loss": 2.9537, "step": 3798 }, { "epoch": 0.3115552139379964, "grad_norm": 0.48675093054771423, "learning_rate": 8.244585018376634e-05, "loss": 2.9733, "step": 3800 }, { "epoch": 0.31171919036638485, "grad_norm": 0.48205146193504333, "learning_rate": 8.24252161776181e-05, "loss": 2.9532, "step": 3802 }, { "epoch": 0.31188316679477324, "grad_norm": 0.4845637381076813, "learning_rate": 8.240457263691123e-05, "loss": 2.9559, "step": 3804 }, { "epoch": 0.3120471432231617, "grad_norm": 0.4753011465072632, "learning_rate": 8.238391956771595e-05, "loss": 2.8951, "step": 3806 }, { "epoch": 0.3122111196515501, "grad_norm": 0.47336089611053467, "learning_rate": 8.236325697610526e-05, "loss": 2.8971, "step": 3808 }, { "epoch": 0.3123750960799385, "grad_norm": 0.539230227470398, "learning_rate": 8.23425848681549e-05, "loss": 2.9824, "step": 3810 }, { "epoch": 0.3125390725083269, "grad_norm": 0.49993154406547546, "learning_rate": 8.232190324994348e-05, "loss": 2.9683, "step": 3812 }, { "epoch": 0.31270304893671536, "grad_norm": 0.46976861357688904, "learning_rate": 8.23012121275524e-05, "loss": 2.9599, "step": 3814 }, { "epoch": 0.31286702536510375, "grad_norm": 0.47436973452568054, "learning_rate": 8.228051150706582e-05, "loss": 2.9725, "step": 3816 }, { "epoch": 0.3130310017934922, "grad_norm": 0.4988282024860382, "learning_rate": 8.22598013945707e-05, "loss": 2.9868, "step": 3818 }, { "epoch": 0.3131949782218806, "grad_norm": 0.4894435405731201, "learning_rate": 8.223908179615681e-05, "loss": 2.936, "step": 3820 }, { "epoch": 0.31335895465026903, "grad_norm": 0.45384493470191956, "learning_rate": 8.22183527179167e-05, "loss": 2.9694, "step": 3822 }, { "epoch": 0.3135229310786574, "grad_norm": 0.4598720967769623, "learning_rate": 8.219761416594569e-05, "loss": 2.9709, "step": 3824 }, { "epoch": 0.31368690750704586, "grad_norm": 0.47938069701194763, "learning_rate": 8.217686614634194e-05, "loss": 2.9953, "step": 3826 }, { "epoch": 0.31385088393543426, "grad_norm": 0.473412424325943, "learning_rate": 8.215610866520631e-05, "loss": 3.0166, "step": 3828 }, { "epoch": 0.3140148603638227, "grad_norm": 0.4905776381492615, "learning_rate": 8.213534172864252e-05, "loss": 2.899, "step": 3830 }, { "epoch": 0.31417883679221115, "grad_norm": 0.4890548288822174, "learning_rate": 8.211456534275702e-05, "loss": 2.9694, "step": 3832 }, { "epoch": 0.31434281322059954, "grad_norm": 0.4220986068248749, "learning_rate": 8.209377951365908e-05, "loss": 2.9941, "step": 3834 }, { "epoch": 0.314506789648988, "grad_norm": 0.4571814239025116, "learning_rate": 8.207298424746071e-05, "loss": 2.987, "step": 3836 }, { "epoch": 0.3146707660773764, "grad_norm": 0.4578603208065033, "learning_rate": 8.20521795502767e-05, "loss": 3.0315, "step": 3838 }, { "epoch": 0.3148347425057648, "grad_norm": 0.49688267707824707, "learning_rate": 8.203136542822464e-05, "loss": 2.981, "step": 3840 }, { "epoch": 0.3149987189341532, "grad_norm": 0.4531402885913849, "learning_rate": 8.201054188742485e-05, "loss": 2.989, "step": 3842 }, { "epoch": 0.31516269536254166, "grad_norm": 0.4830644130706787, "learning_rate": 8.198970893400047e-05, "loss": 2.9532, "step": 3844 }, { "epoch": 0.31532667179093005, "grad_norm": 0.4996969997882843, "learning_rate": 8.196886657407739e-05, "loss": 2.968, "step": 3846 }, { "epoch": 0.3154906482193185, "grad_norm": 0.48447179794311523, "learning_rate": 8.194801481378421e-05, "loss": 2.9828, "step": 3848 }, { "epoch": 0.3156546246477069, "grad_norm": 0.4997907280921936, "learning_rate": 8.192715365925239e-05, "loss": 2.9826, "step": 3850 }, { "epoch": 0.31581860107609533, "grad_norm": 0.4326731860637665, "learning_rate": 8.190628311661607e-05, "loss": 2.9688, "step": 3852 }, { "epoch": 0.3159825775044837, "grad_norm": 0.45658349990844727, "learning_rate": 8.188540319201219e-05, "loss": 3.0064, "step": 3854 }, { "epoch": 0.31614655393287217, "grad_norm": 0.4541572034358978, "learning_rate": 8.186451389158045e-05, "loss": 3.0013, "step": 3856 }, { "epoch": 0.31631053036126056, "grad_norm": 0.4665530025959015, "learning_rate": 8.184361522146331e-05, "loss": 2.9502, "step": 3858 }, { "epoch": 0.316474506789649, "grad_norm": 0.4613116979598999, "learning_rate": 8.182270718780596e-05, "loss": 2.9217, "step": 3860 }, { "epoch": 0.3166384832180374, "grad_norm": 0.46976956725120544, "learning_rate": 8.180178979675635e-05, "loss": 2.9366, "step": 3862 }, { "epoch": 0.31680245964642584, "grad_norm": 0.5204563736915588, "learning_rate": 8.178086305446522e-05, "loss": 3.059, "step": 3864 }, { "epoch": 0.31696643607481423, "grad_norm": 0.5135278105735779, "learning_rate": 8.1759926967086e-05, "loss": 2.8777, "step": 3866 }, { "epoch": 0.3171304125032027, "grad_norm": 0.49763861298561096, "learning_rate": 8.173898154077491e-05, "loss": 2.9745, "step": 3868 }, { "epoch": 0.31729438893159106, "grad_norm": 0.4697003662586212, "learning_rate": 8.17180267816909e-05, "loss": 2.9959, "step": 3870 }, { "epoch": 0.3174583653599795, "grad_norm": 0.47999435663223267, "learning_rate": 8.169706269599567e-05, "loss": 2.9694, "step": 3872 }, { "epoch": 0.3176223417883679, "grad_norm": 0.47604313492774963, "learning_rate": 8.167608928985364e-05, "loss": 2.9576, "step": 3874 }, { "epoch": 0.31778631821675635, "grad_norm": 0.5373707413673401, "learning_rate": 8.165510656943203e-05, "loss": 2.9864, "step": 3876 }, { "epoch": 0.31795029464514474, "grad_norm": 0.47880983352661133, "learning_rate": 8.163411454090073e-05, "loss": 2.9901, "step": 3878 }, { "epoch": 0.3181142710735332, "grad_norm": 0.45610833168029785, "learning_rate": 8.161311321043241e-05, "loss": 2.9617, "step": 3880 }, { "epoch": 0.3182782475019216, "grad_norm": 0.47816580533981323, "learning_rate": 8.159210258420247e-05, "loss": 2.9654, "step": 3882 }, { "epoch": 0.31844222393031, "grad_norm": 0.45326411724090576, "learning_rate": 8.157108266838902e-05, "loss": 2.9668, "step": 3884 }, { "epoch": 0.3186062003586984, "grad_norm": 0.4615935683250427, "learning_rate": 8.15500534691729e-05, "loss": 2.9791, "step": 3886 }, { "epoch": 0.31877017678708686, "grad_norm": 0.439890593290329, "learning_rate": 8.152901499273774e-05, "loss": 3.0039, "step": 3888 }, { "epoch": 0.3189341532154753, "grad_norm": 0.4957211911678314, "learning_rate": 8.150796724526982e-05, "loss": 2.9812, "step": 3890 }, { "epoch": 0.3190981296438637, "grad_norm": 0.49575918912887573, "learning_rate": 8.148691023295818e-05, "loss": 2.9451, "step": 3892 }, { "epoch": 0.31926210607225214, "grad_norm": 0.4739311635494232, "learning_rate": 8.14658439619946e-05, "loss": 2.9739, "step": 3894 }, { "epoch": 0.31942608250064053, "grad_norm": 0.4505142867565155, "learning_rate": 8.144476843857358e-05, "loss": 2.9556, "step": 3896 }, { "epoch": 0.319590058929029, "grad_norm": 0.507182776927948, "learning_rate": 8.14236836688923e-05, "loss": 2.9812, "step": 3898 }, { "epoch": 0.31975403535741737, "grad_norm": 0.5078046321868896, "learning_rate": 8.14025896591507e-05, "loss": 2.9509, "step": 3900 }, { "epoch": 0.3199180117858058, "grad_norm": 0.5385080575942993, "learning_rate": 8.138148641555143e-05, "loss": 2.9495, "step": 3902 }, { "epoch": 0.3200819882141942, "grad_norm": 0.508383572101593, "learning_rate": 8.136037394429982e-05, "loss": 2.9564, "step": 3904 }, { "epoch": 0.32024596464258265, "grad_norm": 0.4714992046356201, "learning_rate": 8.133925225160399e-05, "loss": 2.9736, "step": 3906 }, { "epoch": 0.32040994107097104, "grad_norm": 0.4709402918815613, "learning_rate": 8.13181213436747e-05, "loss": 2.9441, "step": 3908 }, { "epoch": 0.3205739174993595, "grad_norm": 0.4701831340789795, "learning_rate": 8.129698122672543e-05, "loss": 2.9284, "step": 3910 }, { "epoch": 0.3207378939277479, "grad_norm": 0.46206173300743103, "learning_rate": 8.12758319069724e-05, "loss": 2.9342, "step": 3912 }, { "epoch": 0.3209018703561363, "grad_norm": 0.6080299615859985, "learning_rate": 8.125467339063452e-05, "loss": 2.9444, "step": 3914 }, { "epoch": 0.3210658467845247, "grad_norm": 0.5376178026199341, "learning_rate": 8.123350568393338e-05, "loss": 2.964, "step": 3916 }, { "epoch": 0.32122982321291316, "grad_norm": 0.46582067012786865, "learning_rate": 8.12123287930933e-05, "loss": 2.9762, "step": 3918 }, { "epoch": 0.32139379964130155, "grad_norm": 0.4999258816242218, "learning_rate": 8.119114272434134e-05, "loss": 2.914, "step": 3920 }, { "epoch": 0.32155777606969, "grad_norm": 0.45092910528182983, "learning_rate": 8.116994748390714e-05, "loss": 2.9802, "step": 3922 }, { "epoch": 0.3217217524980784, "grad_norm": 0.4431985318660736, "learning_rate": 8.114874307802316e-05, "loss": 2.972, "step": 3924 }, { "epoch": 0.32188572892646683, "grad_norm": 0.49656012654304504, "learning_rate": 8.11275295129245e-05, "loss": 2.9692, "step": 3926 }, { "epoch": 0.3220497053548552, "grad_norm": 0.528873085975647, "learning_rate": 8.110630679484896e-05, "loss": 2.9603, "step": 3928 }, { "epoch": 0.32221368178324367, "grad_norm": 0.4573817849159241, "learning_rate": 8.1085074930037e-05, "loss": 2.9092, "step": 3930 }, { "epoch": 0.32237765821163206, "grad_norm": 0.536751925945282, "learning_rate": 8.106383392473185e-05, "loss": 2.9176, "step": 3932 }, { "epoch": 0.3225416346400205, "grad_norm": 0.5677569508552551, "learning_rate": 8.104258378517932e-05, "loss": 2.9718, "step": 3934 }, { "epoch": 0.3227056110684089, "grad_norm": 0.5534135103225708, "learning_rate": 8.1021324517628e-05, "loss": 2.9247, "step": 3936 }, { "epoch": 0.32286958749679734, "grad_norm": 0.48747488856315613, "learning_rate": 8.100005612832915e-05, "loss": 2.9566, "step": 3938 }, { "epoch": 0.32303356392518573, "grad_norm": 0.5087941884994507, "learning_rate": 8.097877862353664e-05, "loss": 2.9372, "step": 3940 }, { "epoch": 0.3231975403535742, "grad_norm": 0.5312908291816711, "learning_rate": 8.09574920095071e-05, "loss": 2.9734, "step": 3942 }, { "epoch": 0.32336151678196257, "grad_norm": 0.48324260115623474, "learning_rate": 8.09361962924998e-05, "loss": 2.9864, "step": 3944 }, { "epoch": 0.323525493210351, "grad_norm": 0.4854391813278198, "learning_rate": 8.09148914787767e-05, "loss": 3.0042, "step": 3946 }, { "epoch": 0.32368946963873946, "grad_norm": 0.4973825514316559, "learning_rate": 8.08935775746024e-05, "loss": 2.9871, "step": 3948 }, { "epoch": 0.32385344606712785, "grad_norm": 0.44147545099258423, "learning_rate": 8.087225458624425e-05, "loss": 2.8988, "step": 3950 }, { "epoch": 0.3240174224955163, "grad_norm": 0.462311714887619, "learning_rate": 8.085092251997221e-05, "loss": 2.9795, "step": 3952 }, { "epoch": 0.3241813989239047, "grad_norm": 0.5237289071083069, "learning_rate": 8.082958138205892e-05, "loss": 3.0, "step": 3954 }, { "epoch": 0.32434537535229313, "grad_norm": 0.47140932083129883, "learning_rate": 8.080823117877968e-05, "loss": 2.8735, "step": 3956 }, { "epoch": 0.3245093517806815, "grad_norm": 0.4941592514514923, "learning_rate": 8.078687191641248e-05, "loss": 2.9496, "step": 3958 }, { "epoch": 0.32467332820906997, "grad_norm": 0.5430725812911987, "learning_rate": 8.076550360123796e-05, "loss": 2.907, "step": 3960 }, { "epoch": 0.32483730463745836, "grad_norm": 0.5308889150619507, "learning_rate": 8.074412623953942e-05, "loss": 2.9895, "step": 3962 }, { "epoch": 0.3250012810658468, "grad_norm": 0.46821293234825134, "learning_rate": 8.072273983760282e-05, "loss": 2.9611, "step": 3964 }, { "epoch": 0.3251652574942352, "grad_norm": 0.4651613235473633, "learning_rate": 8.070134440171679e-05, "loss": 2.934, "step": 3966 }, { "epoch": 0.32532923392262364, "grad_norm": 0.5212334990501404, "learning_rate": 8.06799399381726e-05, "loss": 2.9629, "step": 3968 }, { "epoch": 0.32549321035101203, "grad_norm": 0.495918869972229, "learning_rate": 8.065852645326419e-05, "loss": 3.0066, "step": 3970 }, { "epoch": 0.3256571867794005, "grad_norm": 0.46576234698295593, "learning_rate": 8.063710395328812e-05, "loss": 2.9201, "step": 3972 }, { "epoch": 0.32582116320778887, "grad_norm": 0.495717316865921, "learning_rate": 8.061567244454368e-05, "loss": 2.9373, "step": 3974 }, { "epoch": 0.3259851396361773, "grad_norm": 0.4994613826274872, "learning_rate": 8.05942319333327e-05, "loss": 2.9653, "step": 3976 }, { "epoch": 0.3261491160645657, "grad_norm": 0.46741795539855957, "learning_rate": 8.057278242595974e-05, "loss": 2.9488, "step": 3978 }, { "epoch": 0.32631309249295415, "grad_norm": 0.5505892038345337, "learning_rate": 8.055132392873196e-05, "loss": 2.9583, "step": 3980 }, { "epoch": 0.32647706892134254, "grad_norm": 0.45577502250671387, "learning_rate": 8.052985644795918e-05, "loss": 2.8944, "step": 3982 }, { "epoch": 0.326641045349731, "grad_norm": 0.4663446843624115, "learning_rate": 8.050837998995389e-05, "loss": 2.9601, "step": 3984 }, { "epoch": 0.3268050217781194, "grad_norm": 0.5026230812072754, "learning_rate": 8.048689456103118e-05, "loss": 2.9505, "step": 3986 }, { "epoch": 0.3269689982065078, "grad_norm": 0.5132330060005188, "learning_rate": 8.046540016750877e-05, "loss": 2.9677, "step": 3988 }, { "epoch": 0.3271329746348962, "grad_norm": 0.45152610540390015, "learning_rate": 8.044389681570705e-05, "loss": 2.9187, "step": 3990 }, { "epoch": 0.32729695106328466, "grad_norm": 0.5075539350509644, "learning_rate": 8.042238451194905e-05, "loss": 3.0216, "step": 3992 }, { "epoch": 0.32746092749167305, "grad_norm": 0.44997990131378174, "learning_rate": 8.040086326256038e-05, "loss": 2.9747, "step": 3994 }, { "epoch": 0.3276249039200615, "grad_norm": 0.4499579966068268, "learning_rate": 8.037933307386932e-05, "loss": 2.9434, "step": 3996 }, { "epoch": 0.3277888803484499, "grad_norm": 0.5278234481811523, "learning_rate": 8.035779395220678e-05, "loss": 2.9539, "step": 3998 }, { "epoch": 0.32795285677683833, "grad_norm": 0.5585642457008362, "learning_rate": 8.03362459039063e-05, "loss": 2.9796, "step": 4000 }, { "epoch": 0.3281168332052267, "grad_norm": 0.6120083928108215, "learning_rate": 8.031468893530399e-05, "loss": 2.9489, "step": 4002 }, { "epoch": 0.32828080963361517, "grad_norm": 0.5297592878341675, "learning_rate": 8.029312305273868e-05, "loss": 2.9691, "step": 4004 }, { "epoch": 0.3284447860620036, "grad_norm": 0.5784056186676025, "learning_rate": 8.027154826255172e-05, "loss": 2.9629, "step": 4006 }, { "epoch": 0.328608762490392, "grad_norm": 0.5364445447921753, "learning_rate": 8.024996457108716e-05, "loss": 2.9363, "step": 4008 }, { "epoch": 0.32877273891878045, "grad_norm": 0.5013018250465393, "learning_rate": 8.022837198469162e-05, "loss": 2.9376, "step": 4010 }, { "epoch": 0.32893671534716884, "grad_norm": 0.5268305540084839, "learning_rate": 8.020677050971435e-05, "loss": 2.9699, "step": 4012 }, { "epoch": 0.3291006917755573, "grad_norm": 0.5022383332252502, "learning_rate": 8.018516015250721e-05, "loss": 2.9699, "step": 4014 }, { "epoch": 0.3292646682039457, "grad_norm": 0.4535817503929138, "learning_rate": 8.016354091942469e-05, "loss": 3.0048, "step": 4016 }, { "epoch": 0.3294286446323341, "grad_norm": 0.5175948739051819, "learning_rate": 8.014191281682384e-05, "loss": 2.974, "step": 4018 }, { "epoch": 0.3295926210607225, "grad_norm": 0.4674460291862488, "learning_rate": 8.012027585106439e-05, "loss": 2.9827, "step": 4020 }, { "epoch": 0.32975659748911096, "grad_norm": 0.4415198564529419, "learning_rate": 8.009863002850863e-05, "loss": 2.9546, "step": 4022 }, { "epoch": 0.32992057391749935, "grad_norm": 0.4976121187210083, "learning_rate": 8.007697535552143e-05, "loss": 2.9473, "step": 4024 }, { "epoch": 0.3300845503458878, "grad_norm": 0.475236177444458, "learning_rate": 8.005531183847035e-05, "loss": 2.9237, "step": 4026 }, { "epoch": 0.3302485267742762, "grad_norm": 0.437836617231369, "learning_rate": 8.003363948372547e-05, "loss": 2.9309, "step": 4028 }, { "epoch": 0.33041250320266463, "grad_norm": 0.4106907844543457, "learning_rate": 8.001195829765948e-05, "loss": 2.9151, "step": 4030 }, { "epoch": 0.330576479631053, "grad_norm": 0.4589502215385437, "learning_rate": 7.999026828664771e-05, "loss": 2.9092, "step": 4032 }, { "epoch": 0.33074045605944147, "grad_norm": 0.5295969247817993, "learning_rate": 7.996856945706804e-05, "loss": 2.9827, "step": 4034 }, { "epoch": 0.33090443248782986, "grad_norm": 0.46702131628990173, "learning_rate": 7.994686181530095e-05, "loss": 2.9661, "step": 4036 }, { "epoch": 0.3310684089162183, "grad_norm": 0.4186733365058899, "learning_rate": 7.992514536772954e-05, "loss": 2.9055, "step": 4038 }, { "epoch": 0.3312323853446067, "grad_norm": 0.46188080310821533, "learning_rate": 7.990342012073948e-05, "loss": 2.8955, "step": 4040 }, { "epoch": 0.33139636177299514, "grad_norm": 0.5356650352478027, "learning_rate": 7.988168608071901e-05, "loss": 2.9707, "step": 4042 }, { "epoch": 0.33156033820138353, "grad_norm": 0.5137851238250732, "learning_rate": 7.9859943254059e-05, "loss": 2.9509, "step": 4044 }, { "epoch": 0.331724314629772, "grad_norm": 0.4819590151309967, "learning_rate": 7.983819164715286e-05, "loss": 2.9202, "step": 4046 }, { "epoch": 0.33188829105816037, "grad_norm": 0.5087484121322632, "learning_rate": 7.98164312663966e-05, "loss": 2.9112, "step": 4048 }, { "epoch": 0.3320522674865488, "grad_norm": 0.49762898683547974, "learning_rate": 7.97946621181888e-05, "loss": 2.8998, "step": 4050 }, { "epoch": 0.3322162439149372, "grad_norm": 0.53311687707901, "learning_rate": 7.977288420893065e-05, "loss": 2.9138, "step": 4052 }, { "epoch": 0.33238022034332565, "grad_norm": 0.5476859211921692, "learning_rate": 7.975109754502588e-05, "loss": 2.9175, "step": 4054 }, { "epoch": 0.33254419677171404, "grad_norm": 0.5419086217880249, "learning_rate": 7.972930213288079e-05, "loss": 2.9735, "step": 4056 }, { "epoch": 0.3327081732001025, "grad_norm": 0.5330222845077515, "learning_rate": 7.970749797890432e-05, "loss": 2.9302, "step": 4058 }, { "epoch": 0.33287214962849093, "grad_norm": 0.511711597442627, "learning_rate": 7.968568508950786e-05, "loss": 2.9859, "step": 4060 }, { "epoch": 0.3330361260568793, "grad_norm": 0.46439769864082336, "learning_rate": 7.96638634711055e-05, "loss": 2.9397, "step": 4062 }, { "epoch": 0.33320010248526777, "grad_norm": 0.4752715528011322, "learning_rate": 7.96420331301138e-05, "loss": 2.939, "step": 4064 }, { "epoch": 0.33336407891365616, "grad_norm": 0.4505968689918518, "learning_rate": 7.962019407295194e-05, "loss": 2.8908, "step": 4066 }, { "epoch": 0.3335280553420446, "grad_norm": 0.47633254528045654, "learning_rate": 7.959834630604164e-05, "loss": 2.9235, "step": 4068 }, { "epoch": 0.333692031770433, "grad_norm": 0.460891991853714, "learning_rate": 7.957648983580718e-05, "loss": 2.9573, "step": 4070 }, { "epoch": 0.33385600819882144, "grad_norm": 0.4335838854312897, "learning_rate": 7.955462466867539e-05, "loss": 2.9811, "step": 4072 }, { "epoch": 0.33401998462720983, "grad_norm": 0.4745972752571106, "learning_rate": 7.95327508110757e-05, "loss": 2.9383, "step": 4074 }, { "epoch": 0.3341839610555983, "grad_norm": 0.46762803196907043, "learning_rate": 7.951086826944005e-05, "loss": 2.9354, "step": 4076 }, { "epoch": 0.33434793748398667, "grad_norm": 0.4496844708919525, "learning_rate": 7.948897705020293e-05, "loss": 2.9077, "step": 4078 }, { "epoch": 0.3345119139123751, "grad_norm": 0.46020054817199707, "learning_rate": 7.946707715980143e-05, "loss": 2.9189, "step": 4080 }, { "epoch": 0.3346758903407635, "grad_norm": 0.48112010955810547, "learning_rate": 7.944516860467518e-05, "loss": 2.9503, "step": 4082 }, { "epoch": 0.33483986676915195, "grad_norm": 0.4764317274093628, "learning_rate": 7.94232513912663e-05, "loss": 2.8818, "step": 4084 }, { "epoch": 0.33500384319754034, "grad_norm": 0.5369095802307129, "learning_rate": 7.940132552601949e-05, "loss": 2.9383, "step": 4086 }, { "epoch": 0.3351678196259288, "grad_norm": 0.5077469944953918, "learning_rate": 7.937939101538203e-05, "loss": 2.9377, "step": 4088 }, { "epoch": 0.3353317960543172, "grad_norm": 0.570915937423706, "learning_rate": 7.935744786580371e-05, "loss": 2.9779, "step": 4090 }, { "epoch": 0.3354957724827056, "grad_norm": 0.4736226499080658, "learning_rate": 7.933549608373683e-05, "loss": 2.8925, "step": 4092 }, { "epoch": 0.335659748911094, "grad_norm": 0.4973873496055603, "learning_rate": 7.931353567563631e-05, "loss": 2.9277, "step": 4094 }, { "epoch": 0.33582372533948246, "grad_norm": 0.511052668094635, "learning_rate": 7.92915666479595e-05, "loss": 2.8972, "step": 4096 }, { "epoch": 0.33598770176787085, "grad_norm": 0.49706801772117615, "learning_rate": 7.926958900716637e-05, "loss": 2.9035, "step": 4098 }, { "epoch": 0.3361516781962593, "grad_norm": 0.5151313543319702, "learning_rate": 7.92476027597194e-05, "loss": 2.9174, "step": 4100 }, { "epoch": 0.3363156546246477, "grad_norm": 0.5134227871894836, "learning_rate": 7.922560791208357e-05, "loss": 2.9505, "step": 4102 }, { "epoch": 0.33647963105303613, "grad_norm": 0.4975057542324066, "learning_rate": 7.920360447072645e-05, "loss": 2.9491, "step": 4104 }, { "epoch": 0.3366436074814245, "grad_norm": 0.5015755295753479, "learning_rate": 7.918159244211807e-05, "loss": 2.9242, "step": 4106 }, { "epoch": 0.33680758390981297, "grad_norm": 0.5908811688423157, "learning_rate": 7.915957183273102e-05, "loss": 2.9008, "step": 4108 }, { "epoch": 0.33697156033820136, "grad_norm": 0.5025661587715149, "learning_rate": 7.91375426490404e-05, "loss": 2.9373, "step": 4110 }, { "epoch": 0.3371355367665898, "grad_norm": 0.5269520282745361, "learning_rate": 7.911550489752389e-05, "loss": 2.9817, "step": 4112 }, { "epoch": 0.3372995131949782, "grad_norm": 0.5089029669761658, "learning_rate": 7.909345858466157e-05, "loss": 2.9172, "step": 4114 }, { "epoch": 0.33746348962336664, "grad_norm": 0.49392855167388916, "learning_rate": 7.907140371693616e-05, "loss": 2.8958, "step": 4116 }, { "epoch": 0.3376274660517551, "grad_norm": 0.4965139329433441, "learning_rate": 7.904934030083281e-05, "loss": 2.9154, "step": 4118 }, { "epoch": 0.3377914424801435, "grad_norm": 0.4956895411014557, "learning_rate": 7.902726834283923e-05, "loss": 2.9441, "step": 4120 }, { "epoch": 0.3379554189085319, "grad_norm": 0.46426016092300415, "learning_rate": 7.90051878494456e-05, "loss": 2.9203, "step": 4122 }, { "epoch": 0.3381193953369203, "grad_norm": 0.47019776701927185, "learning_rate": 7.898309882714468e-05, "loss": 2.9324, "step": 4124 }, { "epoch": 0.33828337176530876, "grad_norm": 0.43719297647476196, "learning_rate": 7.896100128243169e-05, "loss": 2.9194, "step": 4126 }, { "epoch": 0.33844734819369715, "grad_norm": 0.4638632535934448, "learning_rate": 7.893889522180432e-05, "loss": 2.9208, "step": 4128 }, { "epoch": 0.3386113246220856, "grad_norm": 0.46020522713661194, "learning_rate": 7.891678065176284e-05, "loss": 2.9386, "step": 4130 }, { "epoch": 0.338775301050474, "grad_norm": 0.5725560784339905, "learning_rate": 7.889465757880999e-05, "loss": 2.8924, "step": 4132 }, { "epoch": 0.33893927747886243, "grad_norm": 0.5009239912033081, "learning_rate": 7.887252600945096e-05, "loss": 2.9583, "step": 4134 }, { "epoch": 0.3391032539072508, "grad_norm": 0.5835885405540466, "learning_rate": 7.885038595019356e-05, "loss": 2.9996, "step": 4136 }, { "epoch": 0.33926723033563927, "grad_norm": 0.5218272805213928, "learning_rate": 7.882823740754796e-05, "loss": 2.9723, "step": 4138 }, { "epoch": 0.33943120676402766, "grad_norm": 0.48495087027549744, "learning_rate": 7.880608038802694e-05, "loss": 2.9011, "step": 4140 }, { "epoch": 0.3395951831924161, "grad_norm": 0.49771589040756226, "learning_rate": 7.878391489814567e-05, "loss": 2.9282, "step": 4142 }, { "epoch": 0.3397591596208045, "grad_norm": 0.49048569798469543, "learning_rate": 7.876174094442189e-05, "loss": 2.9254, "step": 4144 }, { "epoch": 0.33992313604919294, "grad_norm": 0.5105252861976624, "learning_rate": 7.873955853337578e-05, "loss": 2.9522, "step": 4146 }, { "epoch": 0.34008711247758133, "grad_norm": 0.4772528111934662, "learning_rate": 7.871736767153005e-05, "loss": 2.9794, "step": 4148 }, { "epoch": 0.3402510889059698, "grad_norm": 0.4797604978084564, "learning_rate": 7.869516836540985e-05, "loss": 2.9455, "step": 4150 }, { "epoch": 0.34041506533435817, "grad_norm": 0.4566085934638977, "learning_rate": 7.867296062154284e-05, "loss": 2.9481, "step": 4152 }, { "epoch": 0.3405790417627466, "grad_norm": 0.5133824944496155, "learning_rate": 7.865074444645916e-05, "loss": 2.9179, "step": 4154 }, { "epoch": 0.340743018191135, "grad_norm": 0.475188285112381, "learning_rate": 7.862851984669142e-05, "loss": 2.8836, "step": 4156 }, { "epoch": 0.34090699461952345, "grad_norm": 0.4517628848552704, "learning_rate": 7.86062868287747e-05, "loss": 2.8837, "step": 4158 }, { "epoch": 0.34107097104791184, "grad_norm": 0.44536206126213074, "learning_rate": 7.85840453992466e-05, "loss": 2.9381, "step": 4160 }, { "epoch": 0.3412349474763003, "grad_norm": 0.5397975444793701, "learning_rate": 7.856179556464711e-05, "loss": 2.9107, "step": 4162 }, { "epoch": 0.3413989239046887, "grad_norm": 0.4633306562900543, "learning_rate": 7.853953733151877e-05, "loss": 2.9345, "step": 4164 }, { "epoch": 0.3415629003330771, "grad_norm": 0.4207089841365814, "learning_rate": 7.851727070640658e-05, "loss": 2.9393, "step": 4166 }, { "epoch": 0.3417268767614655, "grad_norm": 0.46550941467285156, "learning_rate": 7.849499569585797e-05, "loss": 2.9011, "step": 4168 }, { "epoch": 0.34189085318985396, "grad_norm": 0.4900607764720917, "learning_rate": 7.847271230642283e-05, "loss": 2.9214, "step": 4170 }, { "epoch": 0.34205482961824235, "grad_norm": 0.45382747054100037, "learning_rate": 7.84504205446536e-05, "loss": 2.9706, "step": 4172 }, { "epoch": 0.3422188060466308, "grad_norm": 0.4561139941215515, "learning_rate": 7.842812041710505e-05, "loss": 2.9733, "step": 4174 }, { "epoch": 0.34238278247501924, "grad_norm": 0.438036173582077, "learning_rate": 7.840581193033452e-05, "loss": 2.9325, "step": 4176 }, { "epoch": 0.34254675890340763, "grad_norm": 0.4495597183704376, "learning_rate": 7.838349509090177e-05, "loss": 2.856, "step": 4178 }, { "epoch": 0.3427107353317961, "grad_norm": 0.44860947132110596, "learning_rate": 7.8361169905369e-05, "loss": 2.8807, "step": 4180 }, { "epoch": 0.34287471176018447, "grad_norm": 0.43089696764945984, "learning_rate": 7.833883638030087e-05, "loss": 2.8998, "step": 4182 }, { "epoch": 0.3430386881885729, "grad_norm": 0.42991116642951965, "learning_rate": 7.831649452226453e-05, "loss": 2.925, "step": 4184 }, { "epoch": 0.3432026646169613, "grad_norm": 0.4584693908691406, "learning_rate": 7.829414433782951e-05, "loss": 2.9263, "step": 4186 }, { "epoch": 0.34336664104534975, "grad_norm": 0.43747174739837646, "learning_rate": 7.827178583356786e-05, "loss": 2.9813, "step": 4188 }, { "epoch": 0.34353061747373814, "grad_norm": 0.44275543093681335, "learning_rate": 7.824941901605407e-05, "loss": 2.9485, "step": 4190 }, { "epoch": 0.3436945939021266, "grad_norm": 0.4621785879135132, "learning_rate": 7.822704389186499e-05, "loss": 2.9154, "step": 4192 }, { "epoch": 0.343858570330515, "grad_norm": 0.4807699918746948, "learning_rate": 7.820466046758001e-05, "loss": 2.939, "step": 4194 }, { "epoch": 0.3440225467589034, "grad_norm": 0.5348572134971619, "learning_rate": 7.818226874978092e-05, "loss": 2.9083, "step": 4196 }, { "epoch": 0.3441865231872918, "grad_norm": 0.4638359844684601, "learning_rate": 7.815986874505195e-05, "loss": 2.951, "step": 4198 }, { "epoch": 0.34435049961568026, "grad_norm": 0.49754995107650757, "learning_rate": 7.813746045997974e-05, "loss": 2.9631, "step": 4200 }, { "epoch": 0.34451447604406865, "grad_norm": 0.44452568888664246, "learning_rate": 7.811504390115344e-05, "loss": 2.9216, "step": 4202 }, { "epoch": 0.3446784524724571, "grad_norm": 0.48824217915534973, "learning_rate": 7.809261907516457e-05, "loss": 2.8855, "step": 4204 }, { "epoch": 0.3448424289008455, "grad_norm": 0.4847983717918396, "learning_rate": 7.807018598860709e-05, "loss": 2.9606, "step": 4206 }, { "epoch": 0.34500640532923393, "grad_norm": 0.4555025100708008, "learning_rate": 7.80477446480774e-05, "loss": 2.9227, "step": 4208 }, { "epoch": 0.3451703817576223, "grad_norm": 0.43306758999824524, "learning_rate": 7.802529506017432e-05, "loss": 2.9116, "step": 4210 }, { "epoch": 0.34533435818601077, "grad_norm": 0.47224709391593933, "learning_rate": 7.80028372314991e-05, "loss": 2.9097, "step": 4212 }, { "epoch": 0.34549833461439916, "grad_norm": 0.6299461126327515, "learning_rate": 7.798037116865542e-05, "loss": 2.9453, "step": 4214 }, { "epoch": 0.3456623110427876, "grad_norm": 0.4493595361709595, "learning_rate": 7.795789687824936e-05, "loss": 2.8246, "step": 4216 }, { "epoch": 0.345826287471176, "grad_norm": 0.4576161205768585, "learning_rate": 7.793541436688943e-05, "loss": 2.9754, "step": 4218 }, { "epoch": 0.34599026389956444, "grad_norm": 0.44774124026298523, "learning_rate": 7.791292364118659e-05, "loss": 2.9545, "step": 4220 }, { "epoch": 0.34615424032795283, "grad_norm": 0.48785826563835144, "learning_rate": 7.789042470775414e-05, "loss": 2.9267, "step": 4222 }, { "epoch": 0.3463182167563413, "grad_norm": 0.4675656855106354, "learning_rate": 7.786791757320788e-05, "loss": 2.9094, "step": 4224 }, { "epoch": 0.34648219318472967, "grad_norm": 0.4874213635921478, "learning_rate": 7.784540224416594e-05, "loss": 2.888, "step": 4226 }, { "epoch": 0.3466461696131181, "grad_norm": 0.43593478202819824, "learning_rate": 7.782287872724895e-05, "loss": 2.9491, "step": 4228 }, { "epoch": 0.3468101460415065, "grad_norm": 0.43017590045928955, "learning_rate": 7.780034702907985e-05, "loss": 2.9125, "step": 4230 }, { "epoch": 0.34697412246989495, "grad_norm": 0.41226688027381897, "learning_rate": 7.777780715628406e-05, "loss": 2.8866, "step": 4232 }, { "epoch": 0.3471380988982834, "grad_norm": 0.44163820147514343, "learning_rate": 7.775525911548935e-05, "loss": 2.9314, "step": 4234 }, { "epoch": 0.3473020753266718, "grad_norm": 0.48321813344955444, "learning_rate": 7.773270291332595e-05, "loss": 2.9164, "step": 4236 }, { "epoch": 0.34746605175506023, "grad_norm": 0.46925100684165955, "learning_rate": 7.771013855642646e-05, "loss": 2.9603, "step": 4238 }, { "epoch": 0.3476300281834486, "grad_norm": 0.49697205424308777, "learning_rate": 7.768756605142584e-05, "loss": 2.9384, "step": 4240 }, { "epoch": 0.34779400461183707, "grad_norm": 0.5071016550064087, "learning_rate": 7.76649854049615e-05, "loss": 2.9333, "step": 4242 }, { "epoch": 0.34795798104022546, "grad_norm": 0.49894312024116516, "learning_rate": 7.764239662367324e-05, "loss": 2.9134, "step": 4244 }, { "epoch": 0.3481219574686139, "grad_norm": 0.5091718435287476, "learning_rate": 7.761979971420323e-05, "loss": 2.9094, "step": 4246 }, { "epoch": 0.3482859338970023, "grad_norm": 0.46571823954582214, "learning_rate": 7.759719468319602e-05, "loss": 2.9714, "step": 4248 }, { "epoch": 0.34844991032539074, "grad_norm": 0.47561269998550415, "learning_rate": 7.75745815372986e-05, "loss": 2.9784, "step": 4250 }, { "epoch": 0.34861388675377913, "grad_norm": 0.5359566807746887, "learning_rate": 7.755196028316027e-05, "loss": 2.9486, "step": 4252 }, { "epoch": 0.3487778631821676, "grad_norm": 0.5660485625267029, "learning_rate": 7.752933092743279e-05, "loss": 2.9364, "step": 4254 }, { "epoch": 0.34894183961055597, "grad_norm": 0.4833694100379944, "learning_rate": 7.750669347677027e-05, "loss": 2.9231, "step": 4256 }, { "epoch": 0.3491058160389444, "grad_norm": 0.509084939956665, "learning_rate": 7.748404793782917e-05, "loss": 2.9245, "step": 4258 }, { "epoch": 0.3492697924673328, "grad_norm": 0.49266326427459717, "learning_rate": 7.74613943172684e-05, "loss": 2.9482, "step": 4260 }, { "epoch": 0.34943376889572125, "grad_norm": 0.4660591781139374, "learning_rate": 7.743873262174917e-05, "loss": 2.9291, "step": 4262 }, { "epoch": 0.34959774532410964, "grad_norm": 0.4857702851295471, "learning_rate": 7.74160628579351e-05, "loss": 2.9203, "step": 4264 }, { "epoch": 0.3497617217524981, "grad_norm": 0.4842332899570465, "learning_rate": 7.739338503249219e-05, "loss": 2.948, "step": 4266 }, { "epoch": 0.3499256981808865, "grad_norm": 0.5411705374717712, "learning_rate": 7.737069915208882e-05, "loss": 2.9183, "step": 4268 }, { "epoch": 0.3500896746092749, "grad_norm": 0.5409607291221619, "learning_rate": 7.734800522339566e-05, "loss": 2.8847, "step": 4270 }, { "epoch": 0.3502536510376633, "grad_norm": 0.43880927562713623, "learning_rate": 7.732530325308587e-05, "loss": 2.9398, "step": 4272 }, { "epoch": 0.35041762746605176, "grad_norm": 0.44448137283325195, "learning_rate": 7.730259324783489e-05, "loss": 2.9228, "step": 4274 }, { "epoch": 0.35058160389444015, "grad_norm": 0.5357170104980469, "learning_rate": 7.727987521432054e-05, "loss": 2.9275, "step": 4276 }, { "epoch": 0.3507455803228286, "grad_norm": 0.48424386978149414, "learning_rate": 7.725714915922299e-05, "loss": 2.8905, "step": 4278 }, { "epoch": 0.350909556751217, "grad_norm": 0.4209941625595093, "learning_rate": 7.72344150892248e-05, "loss": 2.8945, "step": 4280 }, { "epoch": 0.35107353317960543, "grad_norm": 0.42999011278152466, "learning_rate": 7.721167301101088e-05, "loss": 2.8828, "step": 4282 }, { "epoch": 0.3512375096079938, "grad_norm": 0.4773045480251312, "learning_rate": 7.718892293126847e-05, "loss": 2.9068, "step": 4284 }, { "epoch": 0.35140148603638227, "grad_norm": 0.45867007970809937, "learning_rate": 7.716616485668718e-05, "loss": 2.9198, "step": 4286 }, { "epoch": 0.35156546246477066, "grad_norm": 0.47990646958351135, "learning_rate": 7.714339879395897e-05, "loss": 2.8771, "step": 4288 }, { "epoch": 0.3517294388931591, "grad_norm": 0.5104199051856995, "learning_rate": 7.712062474977815e-05, "loss": 2.9214, "step": 4290 }, { "epoch": 0.35189341532154755, "grad_norm": 0.5507774353027344, "learning_rate": 7.709784273084137e-05, "loss": 2.9568, "step": 4292 }, { "epoch": 0.35205739174993594, "grad_norm": 0.5063462257385254, "learning_rate": 7.707505274384761e-05, "loss": 2.8618, "step": 4294 }, { "epoch": 0.3522213681783244, "grad_norm": 0.5065426826477051, "learning_rate": 7.705225479549825e-05, "loss": 2.9249, "step": 4296 }, { "epoch": 0.3523853446067128, "grad_norm": 0.47254642844200134, "learning_rate": 7.702944889249694e-05, "loss": 2.9213, "step": 4298 }, { "epoch": 0.3525493210351012, "grad_norm": 0.5116934776306152, "learning_rate": 7.700663504154973e-05, "loss": 2.9124, "step": 4300 }, { "epoch": 0.3527132974634896, "grad_norm": 0.4968010187149048, "learning_rate": 7.698381324936496e-05, "loss": 2.8896, "step": 4302 }, { "epoch": 0.35287727389187806, "grad_norm": 0.5114293694496155, "learning_rate": 7.696098352265334e-05, "loss": 2.9226, "step": 4304 }, { "epoch": 0.35304125032026645, "grad_norm": 0.46992501616477966, "learning_rate": 7.693814586812788e-05, "loss": 2.8989, "step": 4306 }, { "epoch": 0.3532052267486549, "grad_norm": 0.43957480788230896, "learning_rate": 7.691530029250393e-05, "loss": 2.9235, "step": 4308 }, { "epoch": 0.3533692031770433, "grad_norm": 0.4991060793399811, "learning_rate": 7.689244680249922e-05, "loss": 2.9177, "step": 4310 }, { "epoch": 0.35353317960543174, "grad_norm": 0.5018460154533386, "learning_rate": 7.686958540483372e-05, "loss": 2.9189, "step": 4312 }, { "epoch": 0.3536971560338201, "grad_norm": 0.525246798992157, "learning_rate": 7.68467161062298e-05, "loss": 2.893, "step": 4314 }, { "epoch": 0.35386113246220857, "grad_norm": 0.49975743889808655, "learning_rate": 7.682383891341212e-05, "loss": 2.9423, "step": 4316 }, { "epoch": 0.35402510889059696, "grad_norm": 0.4797281324863434, "learning_rate": 7.680095383310764e-05, "loss": 2.925, "step": 4318 }, { "epoch": 0.3541890853189854, "grad_norm": 0.5019105672836304, "learning_rate": 7.677806087204567e-05, "loss": 2.9299, "step": 4320 }, { "epoch": 0.3543530617473738, "grad_norm": 0.4949765205383301, "learning_rate": 7.675516003695787e-05, "loss": 2.9248, "step": 4322 }, { "epoch": 0.35451703817576224, "grad_norm": 0.46523308753967285, "learning_rate": 7.673225133457815e-05, "loss": 2.8888, "step": 4324 }, { "epoch": 0.35468101460415064, "grad_norm": 0.4424777328968048, "learning_rate": 7.670933477164275e-05, "loss": 2.9019, "step": 4326 }, { "epoch": 0.3548449910325391, "grad_norm": 0.43407967686653137, "learning_rate": 7.668641035489024e-05, "loss": 2.8801, "step": 4328 }, { "epoch": 0.35500896746092747, "grad_norm": 0.48617562651634216, "learning_rate": 7.666347809106149e-05, "loss": 2.9603, "step": 4330 }, { "epoch": 0.3551729438893159, "grad_norm": 0.441741406917572, "learning_rate": 7.664053798689968e-05, "loss": 2.8982, "step": 4332 }, { "epoch": 0.3553369203177043, "grad_norm": 0.4365949034690857, "learning_rate": 7.66175900491503e-05, "loss": 2.9343, "step": 4334 }, { "epoch": 0.35550089674609275, "grad_norm": 0.47714248299598694, "learning_rate": 7.659463428456113e-05, "loss": 2.9008, "step": 4336 }, { "epoch": 0.35566487317448114, "grad_norm": 0.4637061357498169, "learning_rate": 7.657167069988225e-05, "loss": 2.8479, "step": 4338 }, { "epoch": 0.3558288496028696, "grad_norm": 0.48176297545433044, "learning_rate": 7.654869930186607e-05, "loss": 2.8835, "step": 4340 }, { "epoch": 0.355992826031258, "grad_norm": 0.4734376072883606, "learning_rate": 7.652572009726726e-05, "loss": 2.9718, "step": 4342 }, { "epoch": 0.3561568024596464, "grad_norm": 0.4756949245929718, "learning_rate": 7.650273309284281e-05, "loss": 2.8875, "step": 4344 }, { "epoch": 0.3563207788880349, "grad_norm": 0.4926952123641968, "learning_rate": 7.647973829535199e-05, "loss": 2.9093, "step": 4346 }, { "epoch": 0.35648475531642326, "grad_norm": 0.4380865693092346, "learning_rate": 7.645673571155636e-05, "loss": 2.9251, "step": 4348 }, { "epoch": 0.3566487317448117, "grad_norm": 0.45024943351745605, "learning_rate": 7.643372534821978e-05, "loss": 2.9649, "step": 4350 }, { "epoch": 0.3568127081732001, "grad_norm": 0.4537756145000458, "learning_rate": 7.64107072121084e-05, "loss": 2.9046, "step": 4352 }, { "epoch": 0.35697668460158855, "grad_norm": 0.4084469676017761, "learning_rate": 7.638768130999063e-05, "loss": 2.8553, "step": 4354 }, { "epoch": 0.35714066102997694, "grad_norm": 0.4993169605731964, "learning_rate": 7.636464764863721e-05, "loss": 2.8792, "step": 4356 }, { "epoch": 0.3573046374583654, "grad_norm": 0.4630793333053589, "learning_rate": 7.634160623482111e-05, "loss": 2.8703, "step": 4358 }, { "epoch": 0.35746861388675377, "grad_norm": 0.43812108039855957, "learning_rate": 7.631855707531762e-05, "loss": 2.8906, "step": 4360 }, { "epoch": 0.3576325903151422, "grad_norm": 0.45748457312583923, "learning_rate": 7.629550017690428e-05, "loss": 2.916, "step": 4362 }, { "epoch": 0.3577965667435306, "grad_norm": 0.4260389804840088, "learning_rate": 7.627243554636092e-05, "loss": 2.9324, "step": 4364 }, { "epoch": 0.35796054317191905, "grad_norm": 0.4282701313495636, "learning_rate": 7.624936319046965e-05, "loss": 2.9126, "step": 4366 }, { "epoch": 0.35812451960030744, "grad_norm": 0.4490812420845032, "learning_rate": 7.622628311601482e-05, "loss": 2.9562, "step": 4368 }, { "epoch": 0.3582884960286959, "grad_norm": 0.47239381074905396, "learning_rate": 7.620319532978311e-05, "loss": 2.8903, "step": 4370 }, { "epoch": 0.3584524724570843, "grad_norm": 0.44792598485946655, "learning_rate": 7.618009983856338e-05, "loss": 2.9105, "step": 4372 }, { "epoch": 0.3586164488854727, "grad_norm": 0.44292473793029785, "learning_rate": 7.615699664914685e-05, "loss": 2.8907, "step": 4374 }, { "epoch": 0.3587804253138611, "grad_norm": 0.4768601059913635, "learning_rate": 7.613388576832692e-05, "loss": 2.8963, "step": 4376 }, { "epoch": 0.35894440174224956, "grad_norm": 0.45533299446105957, "learning_rate": 7.61107672028993e-05, "loss": 2.8683, "step": 4378 }, { "epoch": 0.35910837817063795, "grad_norm": 0.48470351099967957, "learning_rate": 7.608764095966197e-05, "loss": 2.9618, "step": 4380 }, { "epoch": 0.3592723545990264, "grad_norm": 0.49000081419944763, "learning_rate": 7.606450704541514e-05, "loss": 2.9211, "step": 4382 }, { "epoch": 0.3594363310274148, "grad_norm": 0.536358654499054, "learning_rate": 7.604136546696127e-05, "loss": 2.9231, "step": 4384 }, { "epoch": 0.35960030745580324, "grad_norm": 0.509800136089325, "learning_rate": 7.60182162311051e-05, "loss": 2.8607, "step": 4386 }, { "epoch": 0.3597642838841916, "grad_norm": 0.47165894508361816, "learning_rate": 7.59950593446536e-05, "loss": 2.9478, "step": 4388 }, { "epoch": 0.3599282603125801, "grad_norm": 0.48365214467048645, "learning_rate": 7.5971894814416e-05, "loss": 2.8759, "step": 4390 }, { "epoch": 0.36009223674096846, "grad_norm": 0.507278323173523, "learning_rate": 7.594872264720378e-05, "loss": 2.9494, "step": 4392 }, { "epoch": 0.3602562131693569, "grad_norm": 0.5131365060806274, "learning_rate": 7.592554284983067e-05, "loss": 2.8763, "step": 4394 }, { "epoch": 0.3604201895977453, "grad_norm": 0.44358497858047485, "learning_rate": 7.590235542911262e-05, "loss": 2.8689, "step": 4396 }, { "epoch": 0.36058416602613375, "grad_norm": 0.45631396770477295, "learning_rate": 7.587916039186782e-05, "loss": 2.9157, "step": 4398 }, { "epoch": 0.36074814245452214, "grad_norm": 0.4669037461280823, "learning_rate": 7.585595774491675e-05, "loss": 2.888, "step": 4400 }, { "epoch": 0.3609121188829106, "grad_norm": 0.479308158159256, "learning_rate": 7.58327474950821e-05, "loss": 2.9263, "step": 4402 }, { "epoch": 0.36107609531129903, "grad_norm": 0.44483739137649536, "learning_rate": 7.580952964918873e-05, "loss": 2.9215, "step": 4404 }, { "epoch": 0.3612400717396874, "grad_norm": 0.47619539499282837, "learning_rate": 7.578630421406385e-05, "loss": 2.9243, "step": 4406 }, { "epoch": 0.36140404816807586, "grad_norm": 0.45321038365364075, "learning_rate": 7.576307119653682e-05, "loss": 2.9044, "step": 4408 }, { "epoch": 0.36156802459646425, "grad_norm": 0.48409467935562134, "learning_rate": 7.573983060343927e-05, "loss": 2.9526, "step": 4410 }, { "epoch": 0.3617320010248527, "grad_norm": 0.474016934633255, "learning_rate": 7.571658244160504e-05, "loss": 2.8757, "step": 4412 }, { "epoch": 0.3618959774532411, "grad_norm": 0.4701443016529083, "learning_rate": 7.569332671787019e-05, "loss": 2.897, "step": 4414 }, { "epoch": 0.36205995388162954, "grad_norm": 0.4822806119918823, "learning_rate": 7.567006343907302e-05, "loss": 2.8578, "step": 4416 }, { "epoch": 0.3622239303100179, "grad_norm": 0.47614964842796326, "learning_rate": 7.564679261205401e-05, "loss": 2.9121, "step": 4418 }, { "epoch": 0.3623879067384064, "grad_norm": 0.42333364486694336, "learning_rate": 7.562351424365592e-05, "loss": 2.8316, "step": 4420 }, { "epoch": 0.36255188316679476, "grad_norm": 0.39652881026268005, "learning_rate": 7.56002283407237e-05, "loss": 2.8848, "step": 4422 }, { "epoch": 0.3627158595951832, "grad_norm": 0.44348645210266113, "learning_rate": 7.55769349101045e-05, "loss": 2.8826, "step": 4424 }, { "epoch": 0.3628798360235716, "grad_norm": 0.44164928793907166, "learning_rate": 7.555363395864773e-05, "loss": 2.9068, "step": 4426 }, { "epoch": 0.36304381245196005, "grad_norm": 0.4629053771495819, "learning_rate": 7.553032549320494e-05, "loss": 2.9264, "step": 4428 }, { "epoch": 0.36320778888034844, "grad_norm": 0.49411946535110474, "learning_rate": 7.550700952062995e-05, "loss": 2.9582, "step": 4430 }, { "epoch": 0.3633717653087369, "grad_norm": 0.5183814167976379, "learning_rate": 7.548368604777878e-05, "loss": 2.9351, "step": 4432 }, { "epoch": 0.3635357417371253, "grad_norm": 0.4873194694519043, "learning_rate": 7.546035508150962e-05, "loss": 2.9183, "step": 4434 }, { "epoch": 0.3636997181655137, "grad_norm": 0.44624242186546326, "learning_rate": 7.543701662868288e-05, "loss": 2.9125, "step": 4436 }, { "epoch": 0.3638636945939021, "grad_norm": 0.45851930975914, "learning_rate": 7.541367069616121e-05, "loss": 2.8795, "step": 4438 }, { "epoch": 0.36402767102229056, "grad_norm": 0.4716365933418274, "learning_rate": 7.539031729080941e-05, "loss": 2.9401, "step": 4440 }, { "epoch": 0.36419164745067895, "grad_norm": 0.5059689283370972, "learning_rate": 7.536695641949447e-05, "loss": 2.9172, "step": 4442 }, { "epoch": 0.3643556238790674, "grad_norm": 0.5059446692466736, "learning_rate": 7.534358808908564e-05, "loss": 2.962, "step": 4444 }, { "epoch": 0.3645196003074558, "grad_norm": 0.4895912706851959, "learning_rate": 7.53202123064543e-05, "loss": 2.948, "step": 4446 }, { "epoch": 0.36468357673584423, "grad_norm": 0.4798704981803894, "learning_rate": 7.529682907847402e-05, "loss": 2.9029, "step": 4448 }, { "epoch": 0.3648475531642326, "grad_norm": 0.5090065002441406, "learning_rate": 7.527343841202064e-05, "loss": 2.9106, "step": 4450 }, { "epoch": 0.36501152959262106, "grad_norm": 0.49287864565849304, "learning_rate": 7.525004031397209e-05, "loss": 2.9063, "step": 4452 }, { "epoch": 0.36517550602100946, "grad_norm": 0.4472784101963043, "learning_rate": 7.522663479120854e-05, "loss": 2.9396, "step": 4454 }, { "epoch": 0.3653394824493979, "grad_norm": 0.45480141043663025, "learning_rate": 7.520322185061232e-05, "loss": 2.8923, "step": 4456 }, { "epoch": 0.3655034588777863, "grad_norm": 0.4318976402282715, "learning_rate": 7.517980149906795e-05, "loss": 2.9187, "step": 4458 }, { "epoch": 0.36566743530617474, "grad_norm": 0.42421650886535645, "learning_rate": 7.515637374346216e-05, "loss": 2.8494, "step": 4460 }, { "epoch": 0.3658314117345632, "grad_norm": 0.4657871127128601, "learning_rate": 7.513293859068378e-05, "loss": 2.8256, "step": 4462 }, { "epoch": 0.3659953881629516, "grad_norm": 0.4119051992893219, "learning_rate": 7.510949604762389e-05, "loss": 2.8893, "step": 4464 }, { "epoch": 0.36615936459134, "grad_norm": 0.44624143838882446, "learning_rate": 7.508604612117572e-05, "loss": 2.9587, "step": 4466 }, { "epoch": 0.3663233410197284, "grad_norm": 0.43493539094924927, "learning_rate": 7.506258881823463e-05, "loss": 2.907, "step": 4468 }, { "epoch": 0.36648731744811686, "grad_norm": 0.42813992500305176, "learning_rate": 7.503912414569821e-05, "loss": 2.8328, "step": 4470 }, { "epoch": 0.36665129387650525, "grad_norm": 0.4879220724105835, "learning_rate": 7.50156521104662e-05, "loss": 2.9389, "step": 4472 }, { "epoch": 0.3668152703048937, "grad_norm": 0.49016737937927246, "learning_rate": 7.499217271944049e-05, "loss": 2.9138, "step": 4474 }, { "epoch": 0.3669792467332821, "grad_norm": 0.4534437358379364, "learning_rate": 7.496868597952513e-05, "loss": 2.8611, "step": 4476 }, { "epoch": 0.36714322316167053, "grad_norm": 0.5044350624084473, "learning_rate": 7.494519189762634e-05, "loss": 2.8228, "step": 4478 }, { "epoch": 0.3673071995900589, "grad_norm": 0.44165295362472534, "learning_rate": 7.492169048065252e-05, "loss": 2.9049, "step": 4480 }, { "epoch": 0.36747117601844737, "grad_norm": 0.456106036901474, "learning_rate": 7.489818173551418e-05, "loss": 2.8331, "step": 4482 }, { "epoch": 0.36763515244683576, "grad_norm": 0.4963456392288208, "learning_rate": 7.487466566912405e-05, "loss": 2.965, "step": 4484 }, { "epoch": 0.3677991288752242, "grad_norm": 0.5101214647293091, "learning_rate": 7.485114228839693e-05, "loss": 2.9389, "step": 4486 }, { "epoch": 0.3679631053036126, "grad_norm": 0.5219040513038635, "learning_rate": 7.482761160024982e-05, "loss": 2.9613, "step": 4488 }, { "epoch": 0.36812708173200104, "grad_norm": 0.46619266271591187, "learning_rate": 7.480407361160189e-05, "loss": 2.9146, "step": 4490 }, { "epoch": 0.36829105816038943, "grad_norm": 0.44373613595962524, "learning_rate": 7.478052832937442e-05, "loss": 2.865, "step": 4492 }, { "epoch": 0.3684550345887779, "grad_norm": 0.450777530670166, "learning_rate": 7.475697576049083e-05, "loss": 2.8657, "step": 4494 }, { "epoch": 0.36861901101716626, "grad_norm": 0.5079871416091919, "learning_rate": 7.473341591187672e-05, "loss": 2.9215, "step": 4496 }, { "epoch": 0.3687829874455547, "grad_norm": 0.44126954674720764, "learning_rate": 7.47098487904598e-05, "loss": 2.9016, "step": 4498 }, { "epoch": 0.3689469638739431, "grad_norm": 0.4707218110561371, "learning_rate": 7.468627440316991e-05, "loss": 2.8649, "step": 4500 }, { "epoch": 0.36911094030233155, "grad_norm": 0.4811074137687683, "learning_rate": 7.466269275693906e-05, "loss": 2.8832, "step": 4502 }, { "epoch": 0.36927491673071994, "grad_norm": 0.5184465050697327, "learning_rate": 7.46391038587014e-05, "loss": 2.8904, "step": 4504 }, { "epoch": 0.3694388931591084, "grad_norm": 0.5476818680763245, "learning_rate": 7.461550771539314e-05, "loss": 2.8888, "step": 4506 }, { "epoch": 0.3696028695874968, "grad_norm": 0.4976902902126312, "learning_rate": 7.459190433395271e-05, "loss": 2.892, "step": 4508 }, { "epoch": 0.3697668460158852, "grad_norm": 0.48264408111572266, "learning_rate": 7.456829372132062e-05, "loss": 2.8924, "step": 4510 }, { "epoch": 0.3699308224442736, "grad_norm": 0.4939655065536499, "learning_rate": 7.454467588443949e-05, "loss": 2.8877, "step": 4512 }, { "epoch": 0.37009479887266206, "grad_norm": 0.5026797652244568, "learning_rate": 7.452105083025411e-05, "loss": 2.9223, "step": 4514 }, { "epoch": 0.37025877530105045, "grad_norm": 0.48400169610977173, "learning_rate": 7.449741856571138e-05, "loss": 2.9129, "step": 4516 }, { "epoch": 0.3704227517294389, "grad_norm": 0.49504354596138, "learning_rate": 7.44737790977603e-05, "loss": 2.9228, "step": 4518 }, { "epoch": 0.37058672815782734, "grad_norm": 0.4225211441516876, "learning_rate": 7.4450132433352e-05, "loss": 2.9272, "step": 4520 }, { "epoch": 0.37075070458621573, "grad_norm": 0.4583554267883301, "learning_rate": 7.442647857943973e-05, "loss": 2.9052, "step": 4522 }, { "epoch": 0.3709146810146042, "grad_norm": 0.46609818935394287, "learning_rate": 7.440281754297884e-05, "loss": 2.8505, "step": 4524 }, { "epoch": 0.37107865744299257, "grad_norm": 0.4706270396709442, "learning_rate": 7.437914933092683e-05, "loss": 2.8684, "step": 4526 }, { "epoch": 0.371242633871381, "grad_norm": 0.4370834231376648, "learning_rate": 7.435547395024324e-05, "loss": 2.9258, "step": 4528 }, { "epoch": 0.3714066102997694, "grad_norm": 0.43355029821395874, "learning_rate": 7.43317914078898e-05, "loss": 2.8883, "step": 4530 }, { "epoch": 0.37157058672815785, "grad_norm": 0.45329612493515015, "learning_rate": 7.430810171083028e-05, "loss": 2.9181, "step": 4532 }, { "epoch": 0.37173456315654624, "grad_norm": 0.4789738357067108, "learning_rate": 7.42844048660306e-05, "loss": 2.9491, "step": 4534 }, { "epoch": 0.3718985395849347, "grad_norm": 0.49160686135292053, "learning_rate": 7.426070088045873e-05, "loss": 2.9147, "step": 4536 }, { "epoch": 0.3720625160133231, "grad_norm": 0.4814167320728302, "learning_rate": 7.42369897610848e-05, "loss": 2.8687, "step": 4538 }, { "epoch": 0.3722264924417115, "grad_norm": 0.4552319645881653, "learning_rate": 7.421327151488102e-05, "loss": 2.9497, "step": 4540 }, { "epoch": 0.3723904688700999, "grad_norm": 0.4466070234775543, "learning_rate": 7.418954614882165e-05, "loss": 2.8809, "step": 4542 }, { "epoch": 0.37255444529848836, "grad_norm": 0.44367876648902893, "learning_rate": 7.416581366988309e-05, "loss": 2.8521, "step": 4544 }, { "epoch": 0.37271842172687675, "grad_norm": 0.514403223991394, "learning_rate": 7.414207408504383e-05, "loss": 2.8818, "step": 4546 }, { "epoch": 0.3728823981552652, "grad_norm": 0.45354655385017395, "learning_rate": 7.411832740128441e-05, "loss": 2.8834, "step": 4548 }, { "epoch": 0.3730463745836536, "grad_norm": 0.5105783939361572, "learning_rate": 7.409457362558753e-05, "loss": 2.8796, "step": 4550 }, { "epoch": 0.37321035101204203, "grad_norm": 0.5413146018981934, "learning_rate": 7.40708127649379e-05, "loss": 2.8757, "step": 4552 }, { "epoch": 0.3733743274404304, "grad_norm": 0.4836574196815491, "learning_rate": 7.404704482632236e-05, "loss": 2.9652, "step": 4554 }, { "epoch": 0.37353830386881887, "grad_norm": 0.4397220015525818, "learning_rate": 7.402326981672982e-05, "loss": 2.8942, "step": 4556 }, { "epoch": 0.37370228029720726, "grad_norm": 0.4230784773826599, "learning_rate": 7.399948774315125e-05, "loss": 2.909, "step": 4558 }, { "epoch": 0.3738662567255957, "grad_norm": 0.4401587247848511, "learning_rate": 7.397569861257973e-05, "loss": 2.8884, "step": 4560 }, { "epoch": 0.3740302331539841, "grad_norm": 0.4340316653251648, "learning_rate": 7.395190243201037e-05, "loss": 2.8603, "step": 4562 }, { "epoch": 0.37419420958237254, "grad_norm": 0.5143851637840271, "learning_rate": 7.39280992084404e-05, "loss": 2.9539, "step": 4564 }, { "epoch": 0.37435818601076093, "grad_norm": 0.4426273703575134, "learning_rate": 7.390428894886912e-05, "loss": 2.8304, "step": 4566 }, { "epoch": 0.3745221624391494, "grad_norm": 0.4303826093673706, "learning_rate": 7.388047166029783e-05, "loss": 2.9216, "step": 4568 }, { "epoch": 0.37468613886753777, "grad_norm": 0.4478808641433716, "learning_rate": 7.385664734973e-05, "loss": 2.9159, "step": 4570 }, { "epoch": 0.3748501152959262, "grad_norm": 0.46657073497772217, "learning_rate": 7.383281602417111e-05, "loss": 2.8904, "step": 4572 }, { "epoch": 0.3750140917243146, "grad_norm": 0.48242586851119995, "learning_rate": 7.380897769062866e-05, "loss": 2.9341, "step": 4574 }, { "epoch": 0.37517806815270305, "grad_norm": 0.4930760860443115, "learning_rate": 7.37851323561123e-05, "loss": 2.9213, "step": 4576 }, { "epoch": 0.3753420445810915, "grad_norm": 0.4156397581100464, "learning_rate": 7.376128002763368e-05, "loss": 2.9469, "step": 4578 }, { "epoch": 0.3755060210094799, "grad_norm": 0.4514547884464264, "learning_rate": 7.37374207122065e-05, "loss": 2.8257, "step": 4580 }, { "epoch": 0.37566999743786833, "grad_norm": 0.5136944055557251, "learning_rate": 7.371355441684657e-05, "loss": 2.861, "step": 4582 }, { "epoch": 0.3758339738662567, "grad_norm": 0.45226147770881653, "learning_rate": 7.368968114857172e-05, "loss": 2.9101, "step": 4584 }, { "epoch": 0.37599795029464517, "grad_norm": 0.4473552405834198, "learning_rate": 7.366580091440177e-05, "loss": 2.8764, "step": 4586 }, { "epoch": 0.37616192672303356, "grad_norm": 0.450043648481369, "learning_rate": 7.364191372135872e-05, "loss": 2.8991, "step": 4588 }, { "epoch": 0.376325903151422, "grad_norm": 0.46261969208717346, "learning_rate": 7.361801957646649e-05, "loss": 2.8812, "step": 4590 }, { "epoch": 0.3764898795798104, "grad_norm": 0.4491453468799591, "learning_rate": 7.359411848675113e-05, "loss": 2.8773, "step": 4592 }, { "epoch": 0.37665385600819884, "grad_norm": 0.4237003028392792, "learning_rate": 7.357021045924068e-05, "loss": 2.8133, "step": 4594 }, { "epoch": 0.37681783243658723, "grad_norm": 0.4374450445175171, "learning_rate": 7.354629550096525e-05, "loss": 2.9094, "step": 4596 }, { "epoch": 0.3769818088649757, "grad_norm": 0.43622317910194397, "learning_rate": 7.352237361895699e-05, "loss": 2.8667, "step": 4598 }, { "epoch": 0.37714578529336407, "grad_norm": 0.5011947751045227, "learning_rate": 7.349844482025003e-05, "loss": 2.938, "step": 4600 }, { "epoch": 0.3773097617217525, "grad_norm": 0.44445064663887024, "learning_rate": 7.347450911188063e-05, "loss": 2.9117, "step": 4602 }, { "epoch": 0.3774737381501409, "grad_norm": 0.462815523147583, "learning_rate": 7.345056650088698e-05, "loss": 2.8672, "step": 4604 }, { "epoch": 0.37763771457852935, "grad_norm": 0.4615892767906189, "learning_rate": 7.342661699430939e-05, "loss": 2.8671, "step": 4606 }, { "epoch": 0.37780169100691774, "grad_norm": 0.44637149572372437, "learning_rate": 7.340266059919014e-05, "loss": 2.895, "step": 4608 }, { "epoch": 0.3779656674353062, "grad_norm": 0.45245638489723206, "learning_rate": 7.337869732257352e-05, "loss": 2.9269, "step": 4610 }, { "epoch": 0.3781296438636946, "grad_norm": 0.5181540846824646, "learning_rate": 7.335472717150593e-05, "loss": 2.9433, "step": 4612 }, { "epoch": 0.378293620292083, "grad_norm": 0.46372494101524353, "learning_rate": 7.33307501530357e-05, "loss": 2.881, "step": 4614 }, { "epoch": 0.3784575967204714, "grad_norm": 0.4642076790332794, "learning_rate": 7.330676627421322e-05, "loss": 2.8851, "step": 4616 }, { "epoch": 0.37862157314885986, "grad_norm": 0.3950099050998688, "learning_rate": 7.328277554209094e-05, "loss": 2.804, "step": 4618 }, { "epoch": 0.37878554957724825, "grad_norm": 0.4654783010482788, "learning_rate": 7.32587779637232e-05, "loss": 2.8696, "step": 4620 }, { "epoch": 0.3789495260056367, "grad_norm": 0.4221939742565155, "learning_rate": 7.323477354616648e-05, "loss": 2.8465, "step": 4622 }, { "epoch": 0.3791135024340251, "grad_norm": 0.42203181982040405, "learning_rate": 7.321076229647921e-05, "loss": 2.8943, "step": 4624 }, { "epoch": 0.37927747886241353, "grad_norm": 0.43732768297195435, "learning_rate": 7.318674422172185e-05, "loss": 2.9149, "step": 4626 }, { "epoch": 0.3794414552908019, "grad_norm": 0.47572824358940125, "learning_rate": 7.316271932895685e-05, "loss": 2.9373, "step": 4628 }, { "epoch": 0.37960543171919037, "grad_norm": 0.4992130994796753, "learning_rate": 7.313868762524867e-05, "loss": 2.8813, "step": 4630 }, { "epoch": 0.3797694081475788, "grad_norm": 0.42080262303352356, "learning_rate": 7.311464911766379e-05, "loss": 2.8948, "step": 4632 }, { "epoch": 0.3799333845759672, "grad_norm": 0.47596582770347595, "learning_rate": 7.309060381327066e-05, "loss": 2.9189, "step": 4634 }, { "epoch": 0.38009736100435565, "grad_norm": 0.46686580777168274, "learning_rate": 7.306655171913976e-05, "loss": 2.8552, "step": 4636 }, { "epoch": 0.38026133743274404, "grad_norm": 0.5205831527709961, "learning_rate": 7.304249284234354e-05, "loss": 2.8598, "step": 4638 }, { "epoch": 0.3804253138611325, "grad_norm": 0.47803831100463867, "learning_rate": 7.301842718995646e-05, "loss": 2.8732, "step": 4640 }, { "epoch": 0.3805892902895209, "grad_norm": 0.5350506901741028, "learning_rate": 7.299435476905498e-05, "loss": 2.9056, "step": 4642 }, { "epoch": 0.3807532667179093, "grad_norm": 0.5091723203659058, "learning_rate": 7.297027558671752e-05, "loss": 2.8303, "step": 4644 }, { "epoch": 0.3809172431462977, "grad_norm": 0.4987587332725525, "learning_rate": 7.294618965002451e-05, "loss": 2.8797, "step": 4646 }, { "epoch": 0.38108121957468616, "grad_norm": 0.4535655677318573, "learning_rate": 7.29220969660584e-05, "loss": 2.8964, "step": 4648 }, { "epoch": 0.38124519600307455, "grad_norm": 0.501015841960907, "learning_rate": 7.289799754190354e-05, "loss": 2.8858, "step": 4650 }, { "epoch": 0.381409172431463, "grad_norm": 0.46643322706222534, "learning_rate": 7.28738913846463e-05, "loss": 2.9152, "step": 4652 }, { "epoch": 0.3815731488598514, "grad_norm": 0.5290485620498657, "learning_rate": 7.284977850137509e-05, "loss": 2.911, "step": 4654 }, { "epoch": 0.38173712528823983, "grad_norm": 0.44401952624320984, "learning_rate": 7.282565889918022e-05, "loss": 2.8881, "step": 4656 }, { "epoch": 0.3819011017166282, "grad_norm": 0.4408971071243286, "learning_rate": 7.280153258515398e-05, "loss": 2.8352, "step": 4658 }, { "epoch": 0.38206507814501667, "grad_norm": 0.46017321944236755, "learning_rate": 7.277739956639071e-05, "loss": 2.848, "step": 4660 }, { "epoch": 0.38222905457340506, "grad_norm": 0.46184903383255005, "learning_rate": 7.275325984998662e-05, "loss": 2.8877, "step": 4662 }, { "epoch": 0.3823930310017935, "grad_norm": 0.4524565041065216, "learning_rate": 7.272911344303994e-05, "loss": 2.8983, "step": 4664 }, { "epoch": 0.3825570074301819, "grad_norm": 0.4392745792865753, "learning_rate": 7.27049603526509e-05, "loss": 2.819, "step": 4666 }, { "epoch": 0.38272098385857034, "grad_norm": 0.4415312111377716, "learning_rate": 7.268080058592163e-05, "loss": 2.894, "step": 4668 }, { "epoch": 0.38288496028695873, "grad_norm": 0.4408855140209198, "learning_rate": 7.265663414995626e-05, "loss": 2.8457, "step": 4670 }, { "epoch": 0.3830489367153472, "grad_norm": 0.539198100566864, "learning_rate": 7.263246105186088e-05, "loss": 2.9059, "step": 4672 }, { "epoch": 0.38321291314373557, "grad_norm": 0.4784288704395294, "learning_rate": 7.26082812987435e-05, "loss": 2.83, "step": 4674 }, { "epoch": 0.383376889572124, "grad_norm": 0.49614304304122925, "learning_rate": 7.258409489771417e-05, "loss": 2.8974, "step": 4676 }, { "epoch": 0.3835408660005124, "grad_norm": 0.4700537323951721, "learning_rate": 7.255990185588482e-05, "loss": 2.8507, "step": 4678 }, { "epoch": 0.38370484242890085, "grad_norm": 0.4723677337169647, "learning_rate": 7.253570218036935e-05, "loss": 2.8582, "step": 4680 }, { "epoch": 0.38386881885728924, "grad_norm": 0.4178030490875244, "learning_rate": 7.251149587828362e-05, "loss": 2.9057, "step": 4682 }, { "epoch": 0.3840327952856777, "grad_norm": 0.4648958146572113, "learning_rate": 7.248728295674545e-05, "loss": 2.8949, "step": 4684 }, { "epoch": 0.3841967717140661, "grad_norm": 0.442658394575119, "learning_rate": 7.246306342287456e-05, "loss": 2.8352, "step": 4686 }, { "epoch": 0.3843607481424545, "grad_norm": 0.48625364899635315, "learning_rate": 7.24388372837927e-05, "loss": 2.8022, "step": 4688 }, { "epoch": 0.38452472457084297, "grad_norm": 0.4710596203804016, "learning_rate": 7.241460454662347e-05, "loss": 2.9089, "step": 4690 }, { "epoch": 0.38468870099923136, "grad_norm": 0.4838905334472656, "learning_rate": 7.239036521849246e-05, "loss": 2.882, "step": 4692 }, { "epoch": 0.3848526774276198, "grad_norm": 0.4697989821434021, "learning_rate": 7.236611930652719e-05, "loss": 2.8626, "step": 4694 }, { "epoch": 0.3850166538560082, "grad_norm": 0.4877309203147888, "learning_rate": 7.23418668178571e-05, "loss": 2.889, "step": 4696 }, { "epoch": 0.38518063028439664, "grad_norm": 0.46774300932884216, "learning_rate": 7.231760775961358e-05, "loss": 2.8607, "step": 4698 }, { "epoch": 0.38534460671278503, "grad_norm": 0.45244014263153076, "learning_rate": 7.229334213892996e-05, "loss": 2.9335, "step": 4700 }, { "epoch": 0.3855085831411735, "grad_norm": 0.5148520469665527, "learning_rate": 7.226906996294151e-05, "loss": 2.8708, "step": 4702 }, { "epoch": 0.38567255956956187, "grad_norm": 0.4698105752468109, "learning_rate": 7.224479123878536e-05, "loss": 2.8489, "step": 4704 }, { "epoch": 0.3858365359979503, "grad_norm": 0.4400726556777954, "learning_rate": 7.222050597360063e-05, "loss": 2.8571, "step": 4706 }, { "epoch": 0.3860005124263387, "grad_norm": 0.532037615776062, "learning_rate": 7.219621417452836e-05, "loss": 2.933, "step": 4708 }, { "epoch": 0.38616448885472715, "grad_norm": 0.5109682679176331, "learning_rate": 7.217191584871147e-05, "loss": 2.9106, "step": 4710 }, { "epoch": 0.38632846528311554, "grad_norm": 0.46120208501815796, "learning_rate": 7.214761100329485e-05, "loss": 2.827, "step": 4712 }, { "epoch": 0.386492441711504, "grad_norm": 0.4793972671031952, "learning_rate": 7.212329964542528e-05, "loss": 2.8791, "step": 4714 }, { "epoch": 0.3866564181398924, "grad_norm": 0.48017799854278564, "learning_rate": 7.209898178225144e-05, "loss": 2.9621, "step": 4716 }, { "epoch": 0.3868203945682808, "grad_norm": 0.484854131937027, "learning_rate": 7.207465742092395e-05, "loss": 2.8686, "step": 4718 }, { "epoch": 0.3869843709966692, "grad_norm": 0.46027281880378723, "learning_rate": 7.205032656859534e-05, "loss": 2.9059, "step": 4720 }, { "epoch": 0.38714834742505766, "grad_norm": 0.46675822138786316, "learning_rate": 7.202598923242003e-05, "loss": 2.8374, "step": 4722 }, { "epoch": 0.38731232385344605, "grad_norm": 0.5142098069190979, "learning_rate": 7.200164541955435e-05, "loss": 2.8344, "step": 4724 }, { "epoch": 0.3874763002818345, "grad_norm": 0.474359929561615, "learning_rate": 7.197729513715657e-05, "loss": 2.8859, "step": 4726 }, { "epoch": 0.3876402767102229, "grad_norm": 0.4211660623550415, "learning_rate": 7.195293839238679e-05, "loss": 2.8561, "step": 4728 }, { "epoch": 0.38780425313861133, "grad_norm": 0.4446225166320801, "learning_rate": 7.192857519240707e-05, "loss": 2.8293, "step": 4730 }, { "epoch": 0.3879682295669997, "grad_norm": 0.46592506766319275, "learning_rate": 7.19042055443814e-05, "loss": 2.9295, "step": 4732 }, { "epoch": 0.38813220599538817, "grad_norm": 0.4611138701438904, "learning_rate": 7.187982945547553e-05, "loss": 2.8819, "step": 4734 }, { "epoch": 0.38829618242377656, "grad_norm": 0.4245645999908447, "learning_rate": 7.185544693285727e-05, "loss": 2.8137, "step": 4736 }, { "epoch": 0.388460158852165, "grad_norm": 0.41161197423934937, "learning_rate": 7.18310579836962e-05, "loss": 2.8618, "step": 4738 }, { "epoch": 0.3886241352805534, "grad_norm": 0.49383336305618286, "learning_rate": 7.180666261516384e-05, "loss": 2.8781, "step": 4740 }, { "epoch": 0.38878811170894184, "grad_norm": 0.48720452189445496, "learning_rate": 7.17822608344336e-05, "loss": 2.8798, "step": 4742 }, { "epoch": 0.38895208813733023, "grad_norm": 0.45469310879707336, "learning_rate": 7.175785264868077e-05, "loss": 2.9009, "step": 4744 }, { "epoch": 0.3891160645657187, "grad_norm": 0.4168962240219116, "learning_rate": 7.17334380650825e-05, "loss": 2.8802, "step": 4746 }, { "epoch": 0.3892800409941071, "grad_norm": 0.43333402276039124, "learning_rate": 7.170901709081784e-05, "loss": 2.8137, "step": 4748 }, { "epoch": 0.3894440174224955, "grad_norm": 0.4718402028083801, "learning_rate": 7.168458973306776e-05, "loss": 2.8091, "step": 4750 }, { "epoch": 0.38960799385088396, "grad_norm": 0.45464134216308594, "learning_rate": 7.1660155999015e-05, "loss": 2.8456, "step": 4752 }, { "epoch": 0.38977197027927235, "grad_norm": 0.4399542212486267, "learning_rate": 7.16357158958443e-05, "loss": 2.9038, "step": 4754 }, { "epoch": 0.3899359467076608, "grad_norm": 0.4799881875514984, "learning_rate": 7.16112694307422e-05, "loss": 2.8751, "step": 4756 }, { "epoch": 0.3900999231360492, "grad_norm": 0.5272742509841919, "learning_rate": 7.158681661089714e-05, "loss": 2.8811, "step": 4758 }, { "epoch": 0.39026389956443763, "grad_norm": 0.46250781416893005, "learning_rate": 7.156235744349938e-05, "loss": 2.8629, "step": 4760 }, { "epoch": 0.390427875992826, "grad_norm": 0.46309372782707214, "learning_rate": 7.15378919357411e-05, "loss": 2.8822, "step": 4762 }, { "epoch": 0.39059185242121447, "grad_norm": 0.45474520325660706, "learning_rate": 7.15134200948163e-05, "loss": 2.8844, "step": 4764 }, { "epoch": 0.39075582884960286, "grad_norm": 0.4643316864967346, "learning_rate": 7.14889419279209e-05, "loss": 2.9392, "step": 4766 }, { "epoch": 0.3909198052779913, "grad_norm": 0.44271019101142883, "learning_rate": 7.146445744225265e-05, "loss": 2.8732, "step": 4768 }, { "epoch": 0.3910837817063797, "grad_norm": 0.4619930684566498, "learning_rate": 7.143996664501114e-05, "loss": 2.8882, "step": 4770 }, { "epoch": 0.39124775813476814, "grad_norm": 0.45898932218551636, "learning_rate": 7.141546954339782e-05, "loss": 2.8998, "step": 4772 }, { "epoch": 0.39141173456315653, "grad_norm": 0.49852630496025085, "learning_rate": 7.139096614461602e-05, "loss": 2.8935, "step": 4774 }, { "epoch": 0.391575710991545, "grad_norm": 0.4800775945186615, "learning_rate": 7.136645645587091e-05, "loss": 2.8701, "step": 4776 }, { "epoch": 0.39173968741993337, "grad_norm": 0.4955558478832245, "learning_rate": 7.13419404843695e-05, "loss": 2.8869, "step": 4778 }, { "epoch": 0.3919036638483218, "grad_norm": 0.4397711455821991, "learning_rate": 7.131741823732065e-05, "loss": 2.9377, "step": 4780 }, { "epoch": 0.3920676402767102, "grad_norm": 0.45045700669288635, "learning_rate": 7.129288972193509e-05, "loss": 2.8382, "step": 4782 }, { "epoch": 0.39223161670509865, "grad_norm": 0.4570524990558624, "learning_rate": 7.126835494542534e-05, "loss": 2.9249, "step": 4784 }, { "epoch": 0.39239559313348704, "grad_norm": 0.5094670653343201, "learning_rate": 7.124381391500584e-05, "loss": 2.8795, "step": 4786 }, { "epoch": 0.3925595695618755, "grad_norm": 0.46974891424179077, "learning_rate": 7.121926663789275e-05, "loss": 2.8413, "step": 4788 }, { "epoch": 0.3927235459902639, "grad_norm": 0.418794184923172, "learning_rate": 7.11947131213042e-05, "loss": 2.8607, "step": 4790 }, { "epoch": 0.3928875224186523, "grad_norm": 0.4206392168998718, "learning_rate": 7.117015337246008e-05, "loss": 2.8609, "step": 4792 }, { "epoch": 0.3930514988470407, "grad_norm": 0.4286406934261322, "learning_rate": 7.114558739858211e-05, "loss": 2.8891, "step": 4794 }, { "epoch": 0.39321547527542916, "grad_norm": 0.4457423686981201, "learning_rate": 7.112101520689388e-05, "loss": 2.9103, "step": 4796 }, { "epoch": 0.39337945170381755, "grad_norm": 0.4439098834991455, "learning_rate": 7.109643680462077e-05, "loss": 2.8102, "step": 4798 }, { "epoch": 0.393543428132206, "grad_norm": 0.47788435220718384, "learning_rate": 7.107185219899e-05, "loss": 2.8454, "step": 4800 }, { "epoch": 0.3937074045605944, "grad_norm": 0.4632824957370758, "learning_rate": 7.104726139723063e-05, "loss": 2.8755, "step": 4802 }, { "epoch": 0.39387138098898283, "grad_norm": 0.4338068664073944, "learning_rate": 7.102266440657351e-05, "loss": 2.797, "step": 4804 }, { "epoch": 0.3940353574173713, "grad_norm": 0.43949875235557556, "learning_rate": 7.099806123425134e-05, "loss": 2.8803, "step": 4806 }, { "epoch": 0.39419933384575967, "grad_norm": 0.4427996575832367, "learning_rate": 7.097345188749864e-05, "loss": 2.8919, "step": 4808 }, { "epoch": 0.3943633102741481, "grad_norm": 0.42919567227363586, "learning_rate": 7.094883637355169e-05, "loss": 2.8092, "step": 4810 }, { "epoch": 0.3945272867025365, "grad_norm": 0.4398113489151001, "learning_rate": 7.092421469964864e-05, "loss": 2.8571, "step": 4812 }, { "epoch": 0.39469126313092495, "grad_norm": 0.41409242153167725, "learning_rate": 7.089958687302945e-05, "loss": 2.841, "step": 4814 }, { "epoch": 0.39485523955931334, "grad_norm": 0.413713276386261, "learning_rate": 7.087495290093585e-05, "loss": 2.8935, "step": 4816 }, { "epoch": 0.3950192159877018, "grad_norm": 0.45033878087997437, "learning_rate": 7.085031279061143e-05, "loss": 2.8811, "step": 4818 }, { "epoch": 0.3951831924160902, "grad_norm": 0.4509275257587433, "learning_rate": 7.082566654930154e-05, "loss": 2.8866, "step": 4820 }, { "epoch": 0.3953471688444786, "grad_norm": 0.46417686343193054, "learning_rate": 7.080101418425336e-05, "loss": 2.889, "step": 4822 }, { "epoch": 0.395511145272867, "grad_norm": 0.440935879945755, "learning_rate": 7.077635570271583e-05, "loss": 2.8239, "step": 4824 }, { "epoch": 0.39567512170125546, "grad_norm": 0.47923141717910767, "learning_rate": 7.075169111193976e-05, "loss": 2.8989, "step": 4826 }, { "epoch": 0.39583909812964385, "grad_norm": 0.45613157749176025, "learning_rate": 7.072702041917769e-05, "loss": 2.8212, "step": 4828 }, { "epoch": 0.3960030745580323, "grad_norm": 0.4167928695678711, "learning_rate": 7.0702343631684e-05, "loss": 2.853, "step": 4830 }, { "epoch": 0.3961670509864207, "grad_norm": 0.4974370300769806, "learning_rate": 7.06776607567148e-05, "loss": 2.8654, "step": 4832 }, { "epoch": 0.39633102741480913, "grad_norm": 0.5273467898368835, "learning_rate": 7.065297180152808e-05, "loss": 2.8749, "step": 4834 }, { "epoch": 0.3964950038431975, "grad_norm": 0.5259729623794556, "learning_rate": 7.062827677338354e-05, "loss": 2.8803, "step": 4836 }, { "epoch": 0.39665898027158597, "grad_norm": 0.6957345008850098, "learning_rate": 7.060357567954269e-05, "loss": 2.8754, "step": 4838 }, { "epoch": 0.39682295669997436, "grad_norm": 0.5111395716667175, "learning_rate": 7.057886852726886e-05, "loss": 2.8317, "step": 4840 }, { "epoch": 0.3969869331283628, "grad_norm": 0.5050389766693115, "learning_rate": 7.05541553238271e-05, "loss": 2.8951, "step": 4842 }, { "epoch": 0.3971509095567512, "grad_norm": 0.4891645014286041, "learning_rate": 7.052943607648428e-05, "loss": 2.8765, "step": 4844 }, { "epoch": 0.39731488598513964, "grad_norm": 0.5061144232749939, "learning_rate": 7.050471079250904e-05, "loss": 2.8982, "step": 4846 }, { "epoch": 0.39747886241352803, "grad_norm": 0.4700469374656677, "learning_rate": 7.047997947917177e-05, "loss": 2.9003, "step": 4848 }, { "epoch": 0.3976428388419165, "grad_norm": 0.5077940225601196, "learning_rate": 7.045524214374469e-05, "loss": 2.881, "step": 4850 }, { "epoch": 0.39780681527030487, "grad_norm": 0.48411741852760315, "learning_rate": 7.043049879350174e-05, "loss": 2.8381, "step": 4852 }, { "epoch": 0.3979707916986933, "grad_norm": 0.4596063196659088, "learning_rate": 7.040574943571864e-05, "loss": 2.8967, "step": 4854 }, { "epoch": 0.3981347681270817, "grad_norm": 0.45511674880981445, "learning_rate": 7.03809940776729e-05, "loss": 2.8329, "step": 4856 }, { "epoch": 0.39829874455547015, "grad_norm": 0.4465174376964569, "learning_rate": 7.035623272664373e-05, "loss": 2.8677, "step": 4858 }, { "epoch": 0.39846272098385854, "grad_norm": 0.4612562954425812, "learning_rate": 7.033146538991218e-05, "loss": 2.864, "step": 4860 }, { "epoch": 0.398626697412247, "grad_norm": 0.44819965958595276, "learning_rate": 7.030669207476103e-05, "loss": 2.8707, "step": 4862 }, { "epoch": 0.39879067384063543, "grad_norm": 0.48881852626800537, "learning_rate": 7.02819127884748e-05, "loss": 2.8788, "step": 4864 }, { "epoch": 0.3989546502690238, "grad_norm": 0.4424434304237366, "learning_rate": 7.025712753833978e-05, "loss": 2.9113, "step": 4866 }, { "epoch": 0.39911862669741227, "grad_norm": 0.49569734930992126, "learning_rate": 7.023233633164403e-05, "loss": 2.8788, "step": 4868 }, { "epoch": 0.39928260312580066, "grad_norm": 0.49903231859207153, "learning_rate": 7.020753917567735e-05, "loss": 2.8644, "step": 4870 }, { "epoch": 0.3994465795541891, "grad_norm": 0.4598727226257324, "learning_rate": 7.018273607773126e-05, "loss": 2.8747, "step": 4872 }, { "epoch": 0.3996105559825775, "grad_norm": 0.4078742563724518, "learning_rate": 7.015792704509906e-05, "loss": 2.861, "step": 4874 }, { "epoch": 0.39977453241096594, "grad_norm": 0.44463399052619934, "learning_rate": 7.013311208507581e-05, "loss": 2.8387, "step": 4876 }, { "epoch": 0.39993850883935433, "grad_norm": 0.4435771703720093, "learning_rate": 7.010829120495827e-05, "loss": 2.8086, "step": 4878 }, { "epoch": 0.4001024852677428, "grad_norm": 0.45103517174720764, "learning_rate": 7.008346441204497e-05, "loss": 2.9025, "step": 4880 }, { "epoch": 0.40026646169613117, "grad_norm": 0.45612624287605286, "learning_rate": 7.005863171363615e-05, "loss": 2.8465, "step": 4882 }, { "epoch": 0.4004304381245196, "grad_norm": 0.4706539809703827, "learning_rate": 7.003379311703384e-05, "loss": 2.8552, "step": 4884 }, { "epoch": 0.400594414552908, "grad_norm": 0.42342978715896606, "learning_rate": 7.000894862954175e-05, "loss": 2.8573, "step": 4886 }, { "epoch": 0.40075839098129645, "grad_norm": 0.41396844387054443, "learning_rate": 6.998409825846534e-05, "loss": 2.7617, "step": 4888 }, { "epoch": 0.40092236740968484, "grad_norm": 0.42022252082824707, "learning_rate": 6.995924201111182e-05, "loss": 2.8342, "step": 4890 }, { "epoch": 0.4010863438380733, "grad_norm": 0.42854151129722595, "learning_rate": 6.993437989479008e-05, "loss": 2.8815, "step": 4892 }, { "epoch": 0.4012503202664617, "grad_norm": 0.4279215633869171, "learning_rate": 6.99095119168108e-05, "loss": 2.8604, "step": 4894 }, { "epoch": 0.4014142966948501, "grad_norm": 0.42381319403648376, "learning_rate": 6.988463808448635e-05, "loss": 2.8371, "step": 4896 }, { "epoch": 0.4015782731232385, "grad_norm": 0.4339781403541565, "learning_rate": 6.985975840513082e-05, "loss": 2.8265, "step": 4898 }, { "epoch": 0.40174224955162696, "grad_norm": 0.44247475266456604, "learning_rate": 6.983487288605998e-05, "loss": 2.87, "step": 4900 }, { "epoch": 0.40190622598001535, "grad_norm": 0.46390461921691895, "learning_rate": 6.98099815345914e-05, "loss": 2.8553, "step": 4902 }, { "epoch": 0.4020702024084038, "grad_norm": 0.45721256732940674, "learning_rate": 6.978508435804432e-05, "loss": 2.8543, "step": 4904 }, { "epoch": 0.4022341788367922, "grad_norm": 0.4119372069835663, "learning_rate": 6.976018136373968e-05, "loss": 2.8354, "step": 4906 }, { "epoch": 0.40239815526518063, "grad_norm": 0.4009445905685425, "learning_rate": 6.973527255900017e-05, "loss": 2.9314, "step": 4908 }, { "epoch": 0.402562131693569, "grad_norm": 0.40952831506729126, "learning_rate": 6.971035795115015e-05, "loss": 2.8343, "step": 4910 }, { "epoch": 0.40272610812195747, "grad_norm": 0.4430871307849884, "learning_rate": 6.96854375475157e-05, "loss": 2.8187, "step": 4912 }, { "epoch": 0.40289008455034586, "grad_norm": 0.48836904764175415, "learning_rate": 6.966051135542462e-05, "loss": 2.8811, "step": 4914 }, { "epoch": 0.4030540609787343, "grad_norm": 0.48596030473709106, "learning_rate": 6.963557938220638e-05, "loss": 2.8653, "step": 4916 }, { "epoch": 0.40321803740712275, "grad_norm": 0.5084185004234314, "learning_rate": 6.961064163519217e-05, "loss": 2.8678, "step": 4918 }, { "epoch": 0.40338201383551114, "grad_norm": 0.48467105627059937, "learning_rate": 6.958569812171488e-05, "loss": 2.8321, "step": 4920 }, { "epoch": 0.4035459902638996, "grad_norm": 0.455792099237442, "learning_rate": 6.95607488491091e-05, "loss": 2.8788, "step": 4922 }, { "epoch": 0.403709966692288, "grad_norm": 0.4418245851993561, "learning_rate": 6.95357938247111e-05, "loss": 2.8572, "step": 4924 }, { "epoch": 0.4038739431206764, "grad_norm": 0.44264933466911316, "learning_rate": 6.951083305585886e-05, "loss": 2.8806, "step": 4926 }, { "epoch": 0.4040379195490648, "grad_norm": 0.5314618349075317, "learning_rate": 6.9485866549892e-05, "loss": 2.898, "step": 4928 }, { "epoch": 0.40420189597745326, "grad_norm": 0.5305108428001404, "learning_rate": 6.946089431415188e-05, "loss": 2.8387, "step": 4930 }, { "epoch": 0.40436587240584165, "grad_norm": 0.47681617736816406, "learning_rate": 6.943591635598155e-05, "loss": 2.861, "step": 4932 }, { "epoch": 0.4045298488342301, "grad_norm": 0.4332691431045532, "learning_rate": 6.941093268272568e-05, "loss": 2.8346, "step": 4934 }, { "epoch": 0.4046938252626185, "grad_norm": 0.4275270998477936, "learning_rate": 6.93859433017307e-05, "loss": 2.8146, "step": 4936 }, { "epoch": 0.40485780169100694, "grad_norm": 0.443386048078537, "learning_rate": 6.936094822034465e-05, "loss": 2.8273, "step": 4938 }, { "epoch": 0.4050217781193953, "grad_norm": 0.406711220741272, "learning_rate": 6.933594744591727e-05, "loss": 2.8644, "step": 4940 }, { "epoch": 0.40518575454778377, "grad_norm": 0.4228709042072296, "learning_rate": 6.931094098580002e-05, "loss": 2.7865, "step": 4942 }, { "epoch": 0.40534973097617216, "grad_norm": 0.4386141002178192, "learning_rate": 6.928592884734595e-05, "loss": 2.8248, "step": 4944 }, { "epoch": 0.4055137074045606, "grad_norm": 0.449709415435791, "learning_rate": 6.926091103790984e-05, "loss": 2.8646, "step": 4946 }, { "epoch": 0.405677683832949, "grad_norm": 0.43006017804145813, "learning_rate": 6.923588756484808e-05, "loss": 2.8325, "step": 4948 }, { "epoch": 0.40584166026133744, "grad_norm": 0.43961092829704285, "learning_rate": 6.921085843551885e-05, "loss": 2.8412, "step": 4950 }, { "epoch": 0.40600563668972584, "grad_norm": 0.4436211884021759, "learning_rate": 6.91858236572818e-05, "loss": 2.896, "step": 4952 }, { "epoch": 0.4061696131181143, "grad_norm": 0.46775439381599426, "learning_rate": 6.916078323749844e-05, "loss": 2.8587, "step": 4954 }, { "epoch": 0.40633358954650267, "grad_norm": 0.43751874566078186, "learning_rate": 6.91357371835318e-05, "loss": 2.8989, "step": 4956 }, { "epoch": 0.4064975659748911, "grad_norm": 0.4038192331790924, "learning_rate": 6.91106855027466e-05, "loss": 2.829, "step": 4958 }, { "epoch": 0.4066615424032795, "grad_norm": 0.4433155655860901, "learning_rate": 6.908562820250925e-05, "loss": 2.845, "step": 4960 }, { "epoch": 0.40682551883166795, "grad_norm": 0.3981752097606659, "learning_rate": 6.906056529018782e-05, "loss": 2.834, "step": 4962 }, { "epoch": 0.40698949526005634, "grad_norm": 0.4669404625892639, "learning_rate": 6.903549677315194e-05, "loss": 2.8514, "step": 4964 }, { "epoch": 0.4071534716884448, "grad_norm": 0.42662471532821655, "learning_rate": 6.901042265877299e-05, "loss": 2.8629, "step": 4966 }, { "epoch": 0.4073174481168332, "grad_norm": 0.45783746242523193, "learning_rate": 6.898534295442394e-05, "loss": 2.8507, "step": 4968 }, { "epoch": 0.4074814245452216, "grad_norm": 0.4851702153682709, "learning_rate": 6.896025766747941e-05, "loss": 2.8843, "step": 4970 }, { "epoch": 0.40764540097361, "grad_norm": 0.4829177260398865, "learning_rate": 6.893516680531568e-05, "loss": 2.8818, "step": 4972 }, { "epoch": 0.40780937740199846, "grad_norm": 0.4882843494415283, "learning_rate": 6.891007037531067e-05, "loss": 2.8301, "step": 4974 }, { "epoch": 0.4079733538303869, "grad_norm": 0.4679323732852936, "learning_rate": 6.888496838484391e-05, "loss": 2.8365, "step": 4976 }, { "epoch": 0.4081373302587753, "grad_norm": 0.4670538902282715, "learning_rate": 6.885986084129657e-05, "loss": 2.7802, "step": 4978 }, { "epoch": 0.40830130668716375, "grad_norm": 0.4513445198535919, "learning_rate": 6.88347477520515e-05, "loss": 2.8893, "step": 4980 }, { "epoch": 0.40846528311555214, "grad_norm": 0.4539678394794464, "learning_rate": 6.880962912449313e-05, "loss": 2.8921, "step": 4982 }, { "epoch": 0.4086292595439406, "grad_norm": 0.46022936701774597, "learning_rate": 6.87845049660075e-05, "loss": 2.8595, "step": 4984 }, { "epoch": 0.40879323597232897, "grad_norm": 0.4774166941642761, "learning_rate": 6.875937528398237e-05, "loss": 2.8123, "step": 4986 }, { "epoch": 0.4089572124007174, "grad_norm": 0.45405611395835876, "learning_rate": 6.873424008580701e-05, "loss": 2.8075, "step": 4988 }, { "epoch": 0.4091211888291058, "grad_norm": 0.45312246680259705, "learning_rate": 6.87090993788724e-05, "loss": 2.8432, "step": 4990 }, { "epoch": 0.40928516525749425, "grad_norm": 0.4497641324996948, "learning_rate": 6.86839531705711e-05, "loss": 2.8694, "step": 4992 }, { "epoch": 0.40944914168588264, "grad_norm": 0.49629077315330505, "learning_rate": 6.865880146829727e-05, "loss": 2.8581, "step": 4994 }, { "epoch": 0.4096131181142711, "grad_norm": 0.4826986491680145, "learning_rate": 6.863364427944673e-05, "loss": 2.8362, "step": 4996 }, { "epoch": 0.4097770945426595, "grad_norm": 0.4984830617904663, "learning_rate": 6.86084816114169e-05, "loss": 2.8219, "step": 4998 }, { "epoch": 0.4099410709710479, "grad_norm": 0.48046591877937317, "learning_rate": 6.858331347160678e-05, "loss": 2.8906, "step": 5000 }, { "epoch": 0.4101050473994363, "grad_norm": 0.43370234966278076, "learning_rate": 6.855813986741701e-05, "loss": 2.7766, "step": 5002 }, { "epoch": 0.41026902382782476, "grad_norm": 0.4225366413593292, "learning_rate": 6.853296080624984e-05, "loss": 2.8594, "step": 5004 }, { "epoch": 0.41043300025621315, "grad_norm": 0.4410724639892578, "learning_rate": 6.85077762955091e-05, "loss": 2.7648, "step": 5006 }, { "epoch": 0.4105969766846016, "grad_norm": 0.47217485308647156, "learning_rate": 6.848258634260026e-05, "loss": 2.8507, "step": 5008 }, { "epoch": 0.41076095311299, "grad_norm": 0.45836207270622253, "learning_rate": 6.845739095493033e-05, "loss": 2.8208, "step": 5010 }, { "epoch": 0.41092492954137844, "grad_norm": 0.44076651334762573, "learning_rate": 6.843219013990801e-05, "loss": 2.8757, "step": 5012 }, { "epoch": 0.4110889059697668, "grad_norm": 0.4325857162475586, "learning_rate": 6.840698390494346e-05, "loss": 2.8759, "step": 5014 }, { "epoch": 0.4112528823981553, "grad_norm": 0.43539878726005554, "learning_rate": 6.838177225744859e-05, "loss": 2.8535, "step": 5016 }, { "epoch": 0.41141685882654366, "grad_norm": 0.4194115102291107, "learning_rate": 6.835655520483677e-05, "loss": 2.8644, "step": 5018 }, { "epoch": 0.4115808352549321, "grad_norm": 0.4883398711681366, "learning_rate": 6.833133275452305e-05, "loss": 2.843, "step": 5020 }, { "epoch": 0.4117448116833205, "grad_norm": 0.462043821811676, "learning_rate": 6.830610491392403e-05, "loss": 2.8797, "step": 5022 }, { "epoch": 0.41190878811170895, "grad_norm": 0.4517926871776581, "learning_rate": 6.828087169045788e-05, "loss": 2.8508, "step": 5024 }, { "epoch": 0.41207276454009734, "grad_norm": 0.4353565573692322, "learning_rate": 6.825563309154437e-05, "loss": 2.8581, "step": 5026 }, { "epoch": 0.4122367409684858, "grad_norm": 0.4237254858016968, "learning_rate": 6.823038912460488e-05, "loss": 2.8593, "step": 5028 }, { "epoch": 0.4124007173968742, "grad_norm": 0.4407292902469635, "learning_rate": 6.820513979706232e-05, "loss": 2.8378, "step": 5030 }, { "epoch": 0.4125646938252626, "grad_norm": 0.4466221332550049, "learning_rate": 6.817988511634117e-05, "loss": 2.8888, "step": 5032 }, { "epoch": 0.41272867025365106, "grad_norm": 0.454248309135437, "learning_rate": 6.815462508986755e-05, "loss": 2.9008, "step": 5034 }, { "epoch": 0.41289264668203945, "grad_norm": 0.415745347738266, "learning_rate": 6.812935972506909e-05, "loss": 2.8414, "step": 5036 }, { "epoch": 0.4130566231104279, "grad_norm": 0.43440842628479004, "learning_rate": 6.810408902937503e-05, "loss": 2.8519, "step": 5038 }, { "epoch": 0.4132205995388163, "grad_norm": 0.42548617720603943, "learning_rate": 6.807881301021614e-05, "loss": 2.8922, "step": 5040 }, { "epoch": 0.41338457596720474, "grad_norm": 0.4431571066379547, "learning_rate": 6.805353167502476e-05, "loss": 2.8186, "step": 5042 }, { "epoch": 0.4135485523955931, "grad_norm": 0.4349539577960968, "learning_rate": 6.802824503123484e-05, "loss": 2.8589, "step": 5044 }, { "epoch": 0.4137125288239816, "grad_norm": 0.46239006519317627, "learning_rate": 6.800295308628186e-05, "loss": 2.8455, "step": 5046 }, { "epoch": 0.41387650525236996, "grad_norm": 0.41889017820358276, "learning_rate": 6.79776558476028e-05, "loss": 2.8039, "step": 5048 }, { "epoch": 0.4140404816807584, "grad_norm": 0.4747699201107025, "learning_rate": 6.795235332263631e-05, "loss": 2.8786, "step": 5050 }, { "epoch": 0.4142044581091468, "grad_norm": 0.4729340672492981, "learning_rate": 6.792704551882255e-05, "loss": 2.8934, "step": 5052 }, { "epoch": 0.41436843453753525, "grad_norm": 0.4421963691711426, "learning_rate": 6.790173244360318e-05, "loss": 2.8233, "step": 5054 }, { "epoch": 0.41453241096592364, "grad_norm": 0.46226418018341064, "learning_rate": 6.787641410442146e-05, "loss": 2.8198, "step": 5056 }, { "epoch": 0.4146963873943121, "grad_norm": 0.47469133138656616, "learning_rate": 6.785109050872218e-05, "loss": 2.9181, "step": 5058 }, { "epoch": 0.4148603638227005, "grad_norm": 0.4686332643032074, "learning_rate": 6.782576166395171e-05, "loss": 2.8448, "step": 5060 }, { "epoch": 0.4150243402510889, "grad_norm": 0.4484609067440033, "learning_rate": 6.780042757755791e-05, "loss": 2.8942, "step": 5062 }, { "epoch": 0.4151883166794773, "grad_norm": 0.416677325963974, "learning_rate": 6.777508825699024e-05, "loss": 2.8765, "step": 5064 }, { "epoch": 0.41535229310786576, "grad_norm": 0.4481672942638397, "learning_rate": 6.774974370969964e-05, "loss": 2.893, "step": 5066 }, { "epoch": 0.41551626953625415, "grad_norm": 0.4461904466152191, "learning_rate": 6.772439394313861e-05, "loss": 2.799, "step": 5068 }, { "epoch": 0.4156802459646426, "grad_norm": 0.45392054319381714, "learning_rate": 6.769903896476122e-05, "loss": 2.8791, "step": 5070 }, { "epoch": 0.415844222393031, "grad_norm": 0.4950689971446991, "learning_rate": 6.767367878202302e-05, "loss": 2.8379, "step": 5072 }, { "epoch": 0.41600819882141943, "grad_norm": 0.4642818868160248, "learning_rate": 6.764831340238111e-05, "loss": 2.8984, "step": 5074 }, { "epoch": 0.4161721752498078, "grad_norm": 0.44757944345474243, "learning_rate": 6.762294283329413e-05, "loss": 2.8848, "step": 5076 }, { "epoch": 0.41633615167819626, "grad_norm": 0.4181385040283203, "learning_rate": 6.759756708222224e-05, "loss": 2.8353, "step": 5078 }, { "epoch": 0.41650012810658466, "grad_norm": 0.4409900903701782, "learning_rate": 6.757218615662711e-05, "loss": 2.8491, "step": 5080 }, { "epoch": 0.4166641045349731, "grad_norm": 0.48207783699035645, "learning_rate": 6.754680006397193e-05, "loss": 2.854, "step": 5082 }, { "epoch": 0.4168280809633615, "grad_norm": 0.44279900193214417, "learning_rate": 6.752140881172146e-05, "loss": 2.8975, "step": 5084 }, { "epoch": 0.41699205739174994, "grad_norm": 0.4284610450267792, "learning_rate": 6.74960124073419e-05, "loss": 2.9468, "step": 5086 }, { "epoch": 0.41715603382013833, "grad_norm": 0.4538637697696686, "learning_rate": 6.747061085830102e-05, "loss": 2.9049, "step": 5088 }, { "epoch": 0.4173200102485268, "grad_norm": 0.4139115512371063, "learning_rate": 6.744520417206808e-05, "loss": 2.7537, "step": 5090 }, { "epoch": 0.4174839866769152, "grad_norm": 0.41832759976387024, "learning_rate": 6.741979235611384e-05, "loss": 2.831, "step": 5092 }, { "epoch": 0.4176479631053036, "grad_norm": 0.4567616283893585, "learning_rate": 6.739437541791062e-05, "loss": 2.8572, "step": 5094 }, { "epoch": 0.41781193953369206, "grad_norm": 0.45696723461151123, "learning_rate": 6.73689533649322e-05, "loss": 2.8366, "step": 5096 }, { "epoch": 0.41797591596208045, "grad_norm": 0.46061649918556213, "learning_rate": 6.734352620465386e-05, "loss": 2.902, "step": 5098 }, { "epoch": 0.4181398923904689, "grad_norm": 0.445056676864624, "learning_rate": 6.731809394455242e-05, "loss": 2.8579, "step": 5100 }, { "epoch": 0.4183038688188573, "grad_norm": 0.4230596125125885, "learning_rate": 6.729265659210614e-05, "loss": 2.8865, "step": 5102 }, { "epoch": 0.41846784524724573, "grad_norm": 0.4370947480201721, "learning_rate": 6.726721415479485e-05, "loss": 2.8129, "step": 5104 }, { "epoch": 0.4186318216756341, "grad_norm": 0.4428729712963104, "learning_rate": 6.724176664009983e-05, "loss": 2.818, "step": 5106 }, { "epoch": 0.41879579810402257, "grad_norm": 0.45244166254997253, "learning_rate": 6.721631405550385e-05, "loss": 2.8146, "step": 5108 }, { "epoch": 0.41895977453241096, "grad_norm": 0.44132840633392334, "learning_rate": 6.719085640849119e-05, "loss": 2.8328, "step": 5110 }, { "epoch": 0.4191237509607994, "grad_norm": 0.4221113324165344, "learning_rate": 6.716539370654761e-05, "loss": 2.8828, "step": 5112 }, { "epoch": 0.4192877273891878, "grad_norm": 0.4474412202835083, "learning_rate": 6.713992595716035e-05, "loss": 2.7689, "step": 5114 }, { "epoch": 0.41945170381757624, "grad_norm": 0.4391607642173767, "learning_rate": 6.711445316781815e-05, "loss": 2.8365, "step": 5116 }, { "epoch": 0.41961568024596463, "grad_norm": 0.41488832235336304, "learning_rate": 6.708897534601124e-05, "loss": 2.8563, "step": 5118 }, { "epoch": 0.4197796566743531, "grad_norm": 0.4629252851009369, "learning_rate": 6.706349249923129e-05, "loss": 2.8557, "step": 5120 }, { "epoch": 0.41994363310274146, "grad_norm": 0.4906052052974701, "learning_rate": 6.703800463497147e-05, "loss": 2.8408, "step": 5122 }, { "epoch": 0.4201076095311299, "grad_norm": 0.42281031608581543, "learning_rate": 6.701251176072645e-05, "loss": 2.8541, "step": 5124 }, { "epoch": 0.4202715859595183, "grad_norm": 0.4068866968154907, "learning_rate": 6.698701388399232e-05, "loss": 2.8388, "step": 5126 }, { "epoch": 0.42043556238790675, "grad_norm": 0.39713165163993835, "learning_rate": 6.696151101226669e-05, "loss": 2.8556, "step": 5128 }, { "epoch": 0.42059953881629514, "grad_norm": 0.4381765127182007, "learning_rate": 6.693600315304863e-05, "loss": 2.8704, "step": 5130 }, { "epoch": 0.4207635152446836, "grad_norm": 0.46095380187034607, "learning_rate": 6.691049031383864e-05, "loss": 2.8452, "step": 5132 }, { "epoch": 0.420927491673072, "grad_norm": 0.44617173075675964, "learning_rate": 6.68849725021387e-05, "loss": 2.8298, "step": 5134 }, { "epoch": 0.4210914681014604, "grad_norm": 0.407774418592453, "learning_rate": 6.685944972545233e-05, "loss": 2.8568, "step": 5136 }, { "epoch": 0.4212554445298488, "grad_norm": 0.40078461170196533, "learning_rate": 6.683392199128436e-05, "loss": 2.8673, "step": 5138 }, { "epoch": 0.42141942095823726, "grad_norm": 0.4423627555370331, "learning_rate": 6.68083893071412e-05, "loss": 2.7988, "step": 5140 }, { "epoch": 0.42158339738662565, "grad_norm": 0.4524652659893036, "learning_rate": 6.678285168053069e-05, "loss": 2.8361, "step": 5142 }, { "epoch": 0.4217473738150141, "grad_norm": 0.4723981022834778, "learning_rate": 6.675730911896209e-05, "loss": 2.7871, "step": 5144 }, { "epoch": 0.4219113502434025, "grad_norm": 0.4568463861942291, "learning_rate": 6.673176162994612e-05, "loss": 2.8334, "step": 5146 }, { "epoch": 0.42207532667179093, "grad_norm": 0.4477904736995697, "learning_rate": 6.670620922099497e-05, "loss": 2.7981, "step": 5148 }, { "epoch": 0.4222393031001794, "grad_norm": 0.40692979097366333, "learning_rate": 6.668065189962229e-05, "loss": 2.8587, "step": 5150 }, { "epoch": 0.42240327952856777, "grad_norm": 0.4016028046607971, "learning_rate": 6.665508967334311e-05, "loss": 2.9009, "step": 5152 }, { "epoch": 0.4225672559569562, "grad_norm": 0.43338680267333984, "learning_rate": 6.662952254967396e-05, "loss": 2.8658, "step": 5154 }, { "epoch": 0.4227312323853446, "grad_norm": 0.481141597032547, "learning_rate": 6.660395053613278e-05, "loss": 2.8064, "step": 5156 }, { "epoch": 0.42289520881373305, "grad_norm": 0.46163663268089294, "learning_rate": 6.657837364023899e-05, "loss": 2.8979, "step": 5158 }, { "epoch": 0.42305918524212144, "grad_norm": 0.41502735018730164, "learning_rate": 6.65527918695134e-05, "loss": 2.907, "step": 5160 }, { "epoch": 0.4232231616705099, "grad_norm": 0.44523778557777405, "learning_rate": 6.652720523147826e-05, "loss": 2.8417, "step": 5162 }, { "epoch": 0.4233871380988983, "grad_norm": 0.40618014335632324, "learning_rate": 6.650161373365726e-05, "loss": 2.8888, "step": 5164 }, { "epoch": 0.4235511145272867, "grad_norm": 0.4037351608276367, "learning_rate": 6.647601738357555e-05, "loss": 2.8301, "step": 5166 }, { "epoch": 0.4237150909556751, "grad_norm": 0.42846325039863586, "learning_rate": 6.645041618875965e-05, "loss": 2.8354, "step": 5168 }, { "epoch": 0.42387906738406356, "grad_norm": 0.4747539758682251, "learning_rate": 6.642481015673751e-05, "loss": 2.8743, "step": 5170 }, { "epoch": 0.42404304381245195, "grad_norm": 0.44995033740997314, "learning_rate": 6.63991992950386e-05, "loss": 2.842, "step": 5172 }, { "epoch": 0.4242070202408404, "grad_norm": 0.440548300743103, "learning_rate": 6.637358361119366e-05, "loss": 2.8511, "step": 5174 }, { "epoch": 0.4243709966692288, "grad_norm": 0.4418390691280365, "learning_rate": 6.634796311273493e-05, "loss": 2.8331, "step": 5176 }, { "epoch": 0.42453497309761723, "grad_norm": 0.44166362285614014, "learning_rate": 6.63223378071961e-05, "loss": 2.7898, "step": 5178 }, { "epoch": 0.4246989495260056, "grad_norm": 0.45935314893722534, "learning_rate": 6.629670770211218e-05, "loss": 2.8341, "step": 5180 }, { "epoch": 0.42486292595439407, "grad_norm": 0.45904242992401123, "learning_rate": 6.627107280501968e-05, "loss": 2.8524, "step": 5182 }, { "epoch": 0.42502690238278246, "grad_norm": 0.4371541142463684, "learning_rate": 6.624543312345645e-05, "loss": 2.8115, "step": 5184 }, { "epoch": 0.4251908788111709, "grad_norm": 0.4144454598426819, "learning_rate": 6.621978866496181e-05, "loss": 2.8042, "step": 5186 }, { "epoch": 0.4253548552395593, "grad_norm": 0.42282989621162415, "learning_rate": 6.619413943707642e-05, "loss": 2.8102, "step": 5188 }, { "epoch": 0.42551883166794774, "grad_norm": 0.3988903760910034, "learning_rate": 6.616848544734243e-05, "loss": 2.8422, "step": 5190 }, { "epoch": 0.42568280809633613, "grad_norm": 0.4397544264793396, "learning_rate": 6.614282670330327e-05, "loss": 2.8699, "step": 5192 }, { "epoch": 0.4258467845247246, "grad_norm": 0.4363107681274414, "learning_rate": 6.611716321250387e-05, "loss": 2.8547, "step": 5194 }, { "epoch": 0.42601076095311297, "grad_norm": 0.4343310594558716, "learning_rate": 6.609149498249052e-05, "loss": 2.7935, "step": 5196 }, { "epoch": 0.4261747373815014, "grad_norm": 0.40959495306015015, "learning_rate": 6.606582202081089e-05, "loss": 2.8412, "step": 5198 }, { "epoch": 0.4263387138098898, "grad_norm": 0.40372273325920105, "learning_rate": 6.604014433501404e-05, "loss": 2.8454, "step": 5200 }, { "epoch": 0.42650269023827825, "grad_norm": 0.46662813425064087, "learning_rate": 6.601446193265048e-05, "loss": 2.8771, "step": 5202 }, { "epoch": 0.4266666666666667, "grad_norm": 0.42457115650177, "learning_rate": 6.598877482127201e-05, "loss": 2.8032, "step": 5204 }, { "epoch": 0.4268306430950551, "grad_norm": 0.5101222991943359, "learning_rate": 6.596308300843188e-05, "loss": 2.8584, "step": 5206 }, { "epoch": 0.42699461952344353, "grad_norm": 0.40872541069984436, "learning_rate": 6.593738650168473e-05, "loss": 2.9095, "step": 5208 }, { "epoch": 0.4271585959518319, "grad_norm": 0.46287456154823303, "learning_rate": 6.591168530858653e-05, "loss": 2.84, "step": 5210 }, { "epoch": 0.42732257238022037, "grad_norm": 0.4474398195743561, "learning_rate": 6.588597943669465e-05, "loss": 2.8481, "step": 5212 }, { "epoch": 0.42748654880860876, "grad_norm": 0.444132536649704, "learning_rate": 6.586026889356789e-05, "loss": 2.8654, "step": 5214 }, { "epoch": 0.4276505252369972, "grad_norm": 0.4312966465950012, "learning_rate": 6.583455368676632e-05, "loss": 2.8346, "step": 5216 }, { "epoch": 0.4278145016653856, "grad_norm": 0.43802088499069214, "learning_rate": 6.580883382385148e-05, "loss": 2.8654, "step": 5218 }, { "epoch": 0.42797847809377404, "grad_norm": 0.47087231278419495, "learning_rate": 6.578310931238619e-05, "loss": 2.8939, "step": 5220 }, { "epoch": 0.42814245452216243, "grad_norm": 0.4582447111606598, "learning_rate": 6.575738015993473e-05, "loss": 2.8367, "step": 5222 }, { "epoch": 0.4283064309505509, "grad_norm": 0.4460625648498535, "learning_rate": 6.573164637406264e-05, "loss": 2.7906, "step": 5224 }, { "epoch": 0.42847040737893927, "grad_norm": 0.4364650547504425, "learning_rate": 6.570590796233693e-05, "loss": 2.8239, "step": 5226 }, { "epoch": 0.4286343838073277, "grad_norm": 0.4625886082649231, "learning_rate": 6.568016493232589e-05, "loss": 2.8592, "step": 5228 }, { "epoch": 0.4287983602357161, "grad_norm": 0.4330351948738098, "learning_rate": 6.565441729159923e-05, "loss": 2.8348, "step": 5230 }, { "epoch": 0.42896233666410455, "grad_norm": 0.42922812700271606, "learning_rate": 6.562866504772795e-05, "loss": 2.8507, "step": 5232 }, { "epoch": 0.42912631309249294, "grad_norm": 0.4726974070072174, "learning_rate": 6.560290820828443e-05, "loss": 2.8718, "step": 5234 }, { "epoch": 0.4292902895208814, "grad_norm": 0.42944204807281494, "learning_rate": 6.557714678084243e-05, "loss": 2.7942, "step": 5236 }, { "epoch": 0.4294542659492698, "grad_norm": 0.41010624170303345, "learning_rate": 6.555138077297707e-05, "loss": 2.8685, "step": 5238 }, { "epoch": 0.4296182423776582, "grad_norm": 0.4391094148159027, "learning_rate": 6.552561019226471e-05, "loss": 2.844, "step": 5240 }, { "epoch": 0.4297822188060466, "grad_norm": 0.4501963257789612, "learning_rate": 6.549983504628318e-05, "loss": 2.7881, "step": 5242 }, { "epoch": 0.42994619523443506, "grad_norm": 0.4697725176811218, "learning_rate": 6.54740553426116e-05, "loss": 2.8676, "step": 5244 }, { "epoch": 0.43011017166282345, "grad_norm": 0.46249881386756897, "learning_rate": 6.544827108883041e-05, "loss": 2.8892, "step": 5246 }, { "epoch": 0.4302741480912119, "grad_norm": 0.414722740650177, "learning_rate": 6.542248229252139e-05, "loss": 2.8313, "step": 5248 }, { "epoch": 0.4304381245196003, "grad_norm": 0.43396633863449097, "learning_rate": 6.539668896126774e-05, "loss": 2.8321, "step": 5250 }, { "epoch": 0.43060210094798873, "grad_norm": 0.44429492950439453, "learning_rate": 6.537089110265387e-05, "loss": 2.8523, "step": 5252 }, { "epoch": 0.4307660773763771, "grad_norm": 0.46500155329704285, "learning_rate": 6.53450887242656e-05, "loss": 2.7915, "step": 5254 }, { "epoch": 0.43093005380476557, "grad_norm": 0.5475171804428101, "learning_rate": 6.531928183369008e-05, "loss": 2.8314, "step": 5256 }, { "epoch": 0.43109403023315396, "grad_norm": 0.5729891657829285, "learning_rate": 6.529347043851573e-05, "loss": 2.8863, "step": 5258 }, { "epoch": 0.4312580066615424, "grad_norm": 0.4980946481227875, "learning_rate": 6.526765454633235e-05, "loss": 2.8636, "step": 5260 }, { "epoch": 0.43142198308993085, "grad_norm": 0.4520968198776245, "learning_rate": 6.524183416473103e-05, "loss": 2.8461, "step": 5262 }, { "epoch": 0.43158595951831924, "grad_norm": 0.38656288385391235, "learning_rate": 6.52160093013042e-05, "loss": 2.8186, "step": 5264 }, { "epoch": 0.4317499359467077, "grad_norm": 0.39334821701049805, "learning_rate": 6.519017996364562e-05, "loss": 2.8716, "step": 5266 }, { "epoch": 0.4319139123750961, "grad_norm": 0.41815948486328125, "learning_rate": 6.51643461593503e-05, "loss": 2.804, "step": 5268 }, { "epoch": 0.4320778888034845, "grad_norm": 0.48571309447288513, "learning_rate": 6.513850789601466e-05, "loss": 2.8229, "step": 5270 }, { "epoch": 0.4322418652318729, "grad_norm": 0.46701404452323914, "learning_rate": 6.511266518123633e-05, "loss": 2.8247, "step": 5272 }, { "epoch": 0.43240584166026136, "grad_norm": 0.46157705783843994, "learning_rate": 6.508681802261435e-05, "loss": 2.842, "step": 5274 }, { "epoch": 0.43256981808864975, "grad_norm": 0.4298838675022125, "learning_rate": 6.506096642774897e-05, "loss": 2.8514, "step": 5276 }, { "epoch": 0.4327337945170382, "grad_norm": 0.43288126587867737, "learning_rate": 6.503511040424182e-05, "loss": 2.8294, "step": 5278 }, { "epoch": 0.4328977709454266, "grad_norm": 0.43317630887031555, "learning_rate": 6.500924995969582e-05, "loss": 2.8552, "step": 5280 }, { "epoch": 0.43306174737381503, "grad_norm": 0.41484972834587097, "learning_rate": 6.498338510171514e-05, "loss": 2.7769, "step": 5282 }, { "epoch": 0.4332257238022034, "grad_norm": 0.41651368141174316, "learning_rate": 6.495751583790526e-05, "loss": 2.8219, "step": 5284 }, { "epoch": 0.43338970023059187, "grad_norm": 0.4511995315551758, "learning_rate": 6.493164217587303e-05, "loss": 2.7643, "step": 5286 }, { "epoch": 0.43355367665898026, "grad_norm": 0.4024675488471985, "learning_rate": 6.490576412322652e-05, "loss": 2.7651, "step": 5288 }, { "epoch": 0.4337176530873687, "grad_norm": 0.411886066198349, "learning_rate": 6.48798816875751e-05, "loss": 2.8321, "step": 5290 }, { "epoch": 0.4338816295157571, "grad_norm": 0.43117591738700867, "learning_rate": 6.485399487652945e-05, "loss": 2.8002, "step": 5292 }, { "epoch": 0.43404560594414554, "grad_norm": 0.42103347182273865, "learning_rate": 6.48281036977015e-05, "loss": 2.8772, "step": 5294 }, { "epoch": 0.43420958237253393, "grad_norm": 0.4188506305217743, "learning_rate": 6.480220815870453e-05, "loss": 2.8489, "step": 5296 }, { "epoch": 0.4343735588009224, "grad_norm": 0.42243778705596924, "learning_rate": 6.477630826715305e-05, "loss": 2.8632, "step": 5298 }, { "epoch": 0.43453753522931077, "grad_norm": 0.45281627774238586, "learning_rate": 6.475040403066284e-05, "loss": 2.8428, "step": 5300 }, { "epoch": 0.4347015116576992, "grad_norm": 0.43130266666412354, "learning_rate": 6.472449545685099e-05, "loss": 2.8076, "step": 5302 }, { "epoch": 0.4348654880860876, "grad_norm": 0.4273144006729126, "learning_rate": 6.469858255333588e-05, "loss": 2.7982, "step": 5304 }, { "epoch": 0.43502946451447605, "grad_norm": 0.43439432978630066, "learning_rate": 6.46726653277371e-05, "loss": 2.8237, "step": 5306 }, { "epoch": 0.43519344094286444, "grad_norm": 0.42046454548835754, "learning_rate": 6.464674378767558e-05, "loss": 2.8721, "step": 5308 }, { "epoch": 0.4353574173712529, "grad_norm": 0.4046113193035126, "learning_rate": 6.462081794077348e-05, "loss": 2.8418, "step": 5310 }, { "epoch": 0.4355213937996413, "grad_norm": 0.3967956006526947, "learning_rate": 6.459488779465424e-05, "loss": 2.8118, "step": 5312 }, { "epoch": 0.4356853702280297, "grad_norm": 0.3960517644882202, "learning_rate": 6.456895335694253e-05, "loss": 2.8551, "step": 5314 }, { "epoch": 0.4358493466564181, "grad_norm": 0.3832727372646332, "learning_rate": 6.454301463526434e-05, "loss": 2.8441, "step": 5316 }, { "epoch": 0.43601332308480656, "grad_norm": 0.3792496621608734, "learning_rate": 6.451707163724687e-05, "loss": 2.8411, "step": 5318 }, { "epoch": 0.436177299513195, "grad_norm": 0.39607372879981995, "learning_rate": 6.449112437051862e-05, "loss": 2.8356, "step": 5320 }, { "epoch": 0.4363412759415834, "grad_norm": 0.4213770031929016, "learning_rate": 6.446517284270932e-05, "loss": 2.8326, "step": 5322 }, { "epoch": 0.43650525236997184, "grad_norm": 0.43514689803123474, "learning_rate": 6.443921706144992e-05, "loss": 2.8283, "step": 5324 }, { "epoch": 0.43666922879836023, "grad_norm": 0.4403941333293915, "learning_rate": 6.441325703437269e-05, "loss": 2.826, "step": 5326 }, { "epoch": 0.4368332052267487, "grad_norm": 0.4238094091415405, "learning_rate": 6.438729276911112e-05, "loss": 2.8394, "step": 5328 }, { "epoch": 0.43699718165513707, "grad_norm": 0.4089421033859253, "learning_rate": 6.436132427329992e-05, "loss": 2.8331, "step": 5330 }, { "epoch": 0.4371611580835255, "grad_norm": 0.4226435720920563, "learning_rate": 6.433535155457508e-05, "loss": 2.8852, "step": 5332 }, { "epoch": 0.4373251345119139, "grad_norm": 0.4296805262565613, "learning_rate": 6.43093746205738e-05, "loss": 2.8884, "step": 5334 }, { "epoch": 0.43748911094030235, "grad_norm": 0.449306845664978, "learning_rate": 6.428339347893456e-05, "loss": 2.8108, "step": 5336 }, { "epoch": 0.43765308736869074, "grad_norm": 0.4620767831802368, "learning_rate": 6.425740813729704e-05, "loss": 2.8515, "step": 5338 }, { "epoch": 0.4378170637970792, "grad_norm": 0.41048452258110046, "learning_rate": 6.423141860330216e-05, "loss": 2.8679, "step": 5340 }, { "epoch": 0.4379810402254676, "grad_norm": 0.412546306848526, "learning_rate": 6.42054248845921e-05, "loss": 2.8475, "step": 5342 }, { "epoch": 0.438145016653856, "grad_norm": 0.42965230345726013, "learning_rate": 6.417942698881023e-05, "loss": 2.793, "step": 5344 }, { "epoch": 0.4383089930822444, "grad_norm": 0.4410039484500885, "learning_rate": 6.41534249236012e-05, "loss": 2.8357, "step": 5346 }, { "epoch": 0.43847296951063286, "grad_norm": 0.4114231467247009, "learning_rate": 6.412741869661082e-05, "loss": 2.8695, "step": 5348 }, { "epoch": 0.43863694593902125, "grad_norm": 0.4227801561355591, "learning_rate": 6.410140831548619e-05, "loss": 2.8338, "step": 5350 }, { "epoch": 0.4388009223674097, "grad_norm": 0.3871307373046875, "learning_rate": 6.40753937878756e-05, "loss": 2.8302, "step": 5352 }, { "epoch": 0.4389648987957981, "grad_norm": 0.3907979130744934, "learning_rate": 6.404937512142852e-05, "loss": 2.8185, "step": 5354 }, { "epoch": 0.43912887522418653, "grad_norm": 0.4412355422973633, "learning_rate": 6.402335232379576e-05, "loss": 2.8504, "step": 5356 }, { "epoch": 0.4392928516525749, "grad_norm": 0.450156033039093, "learning_rate": 6.399732540262916e-05, "loss": 2.8437, "step": 5358 }, { "epoch": 0.43945682808096337, "grad_norm": 0.465420663356781, "learning_rate": 6.397129436558196e-05, "loss": 2.836, "step": 5360 }, { "epoch": 0.43962080450935176, "grad_norm": 0.4449327886104584, "learning_rate": 6.394525922030848e-05, "loss": 2.7505, "step": 5362 }, { "epoch": 0.4397847809377402, "grad_norm": 0.41881614923477173, "learning_rate": 6.391921997446431e-05, "loss": 2.8624, "step": 5364 }, { "epoch": 0.4399487573661286, "grad_norm": 0.4254155158996582, "learning_rate": 6.38931766357062e-05, "loss": 2.8722, "step": 5366 }, { "epoch": 0.44011273379451704, "grad_norm": 0.40859904885292053, "learning_rate": 6.386712921169218e-05, "loss": 2.8448, "step": 5368 }, { "epoch": 0.44027671022290543, "grad_norm": 0.4507814645767212, "learning_rate": 6.384107771008141e-05, "loss": 2.8432, "step": 5370 }, { "epoch": 0.4404406866512939, "grad_norm": 0.4630066454410553, "learning_rate": 6.381502213853425e-05, "loss": 2.868, "step": 5372 }, { "epoch": 0.44060466307968227, "grad_norm": 0.43618708848953247, "learning_rate": 6.378896250471232e-05, "loss": 2.7984, "step": 5374 }, { "epoch": 0.4407686395080707, "grad_norm": 0.41137388348579407, "learning_rate": 6.37628988162784e-05, "loss": 2.8467, "step": 5376 }, { "epoch": 0.44093261593645916, "grad_norm": 0.4223901927471161, "learning_rate": 6.373683108089639e-05, "loss": 2.7786, "step": 5378 }, { "epoch": 0.44109659236484755, "grad_norm": 0.46738335490226746, "learning_rate": 6.371075930623151e-05, "loss": 2.8053, "step": 5380 }, { "epoch": 0.441260568793236, "grad_norm": 0.5173202753067017, "learning_rate": 6.368468349995009e-05, "loss": 2.852, "step": 5382 }, { "epoch": 0.4414245452216244, "grad_norm": 0.4798794090747833, "learning_rate": 6.365860366971965e-05, "loss": 2.8693, "step": 5384 }, { "epoch": 0.44158852165001283, "grad_norm": 0.4778260588645935, "learning_rate": 6.363251982320891e-05, "loss": 2.8151, "step": 5386 }, { "epoch": 0.4417524980784012, "grad_norm": 0.4863475561141968, "learning_rate": 6.360643196808774e-05, "loss": 2.8251, "step": 5388 }, { "epoch": 0.44191647450678967, "grad_norm": 0.4321562647819519, "learning_rate": 6.358034011202724e-05, "loss": 2.7902, "step": 5390 }, { "epoch": 0.44208045093517806, "grad_norm": 0.43023213744163513, "learning_rate": 6.355424426269965e-05, "loss": 2.8054, "step": 5392 }, { "epoch": 0.4422444273635665, "grad_norm": 0.4235995411872864, "learning_rate": 6.352814442777842e-05, "loss": 2.8321, "step": 5394 }, { "epoch": 0.4424084037919549, "grad_norm": 0.43662554025650024, "learning_rate": 6.350204061493808e-05, "loss": 2.8302, "step": 5396 }, { "epoch": 0.44257238022034334, "grad_norm": 0.4400186538696289, "learning_rate": 6.347593283185444e-05, "loss": 2.8407, "step": 5398 }, { "epoch": 0.44273635664873173, "grad_norm": 0.4381493926048279, "learning_rate": 6.344982108620445e-05, "loss": 2.8063, "step": 5400 }, { "epoch": 0.4429003330771202, "grad_norm": 0.41799694299697876, "learning_rate": 6.342370538566617e-05, "loss": 2.796, "step": 5402 }, { "epoch": 0.44306430950550857, "grad_norm": 0.4099697172641754, "learning_rate": 6.339758573791888e-05, "loss": 2.8589, "step": 5404 }, { "epoch": 0.443228285933897, "grad_norm": 0.42744383215904236, "learning_rate": 6.337146215064298e-05, "loss": 2.9065, "step": 5406 }, { "epoch": 0.4433922623622854, "grad_norm": 0.43363282084465027, "learning_rate": 6.334533463152008e-05, "loss": 2.846, "step": 5408 }, { "epoch": 0.44355623879067385, "grad_norm": 0.46505486965179443, "learning_rate": 6.33192031882329e-05, "loss": 2.8277, "step": 5410 }, { "epoch": 0.44372021521906224, "grad_norm": 0.4066617488861084, "learning_rate": 6.329306782846532e-05, "loss": 2.8879, "step": 5412 }, { "epoch": 0.4438841916474507, "grad_norm": 0.4458906054496765, "learning_rate": 6.326692855990239e-05, "loss": 2.861, "step": 5414 }, { "epoch": 0.4440481680758391, "grad_norm": 0.45811498165130615, "learning_rate": 6.32407853902303e-05, "loss": 2.8391, "step": 5416 }, { "epoch": 0.4442121445042275, "grad_norm": 0.42726707458496094, "learning_rate": 6.32146383271364e-05, "loss": 2.7785, "step": 5418 }, { "epoch": 0.4443761209326159, "grad_norm": 0.3991510570049286, "learning_rate": 6.318848737830916e-05, "loss": 2.8428, "step": 5420 }, { "epoch": 0.44454009736100436, "grad_norm": 0.4369128942489624, "learning_rate": 6.31623325514382e-05, "loss": 2.8468, "step": 5422 }, { "epoch": 0.44470407378939275, "grad_norm": 0.44972068071365356, "learning_rate": 6.31361738542143e-05, "loss": 2.8408, "step": 5424 }, { "epoch": 0.4448680502177812, "grad_norm": 0.4460034668445587, "learning_rate": 6.311001129432936e-05, "loss": 2.8325, "step": 5426 }, { "epoch": 0.4450320266461696, "grad_norm": 0.4248029589653015, "learning_rate": 6.308384487947639e-05, "loss": 2.8542, "step": 5428 }, { "epoch": 0.44519600307455803, "grad_norm": 0.42048823833465576, "learning_rate": 6.30576746173496e-05, "loss": 2.8299, "step": 5430 }, { "epoch": 0.4453599795029465, "grad_norm": 0.46963199973106384, "learning_rate": 6.30315005156443e-05, "loss": 2.8312, "step": 5432 }, { "epoch": 0.44552395593133487, "grad_norm": 0.44268596172332764, "learning_rate": 6.300532258205688e-05, "loss": 2.7943, "step": 5434 }, { "epoch": 0.4456879323597233, "grad_norm": 0.4529852867126465, "learning_rate": 6.297914082428491e-05, "loss": 2.801, "step": 5436 }, { "epoch": 0.4458519087881117, "grad_norm": 0.44438987970352173, "learning_rate": 6.295295525002713e-05, "loss": 2.8337, "step": 5438 }, { "epoch": 0.44601588521650015, "grad_norm": 0.48741820454597473, "learning_rate": 6.292676586698328e-05, "loss": 2.8151, "step": 5440 }, { "epoch": 0.44617986164488854, "grad_norm": 0.45658358931541443, "learning_rate": 6.29005726828543e-05, "loss": 2.848, "step": 5442 }, { "epoch": 0.446343838073277, "grad_norm": 0.4459148049354553, "learning_rate": 6.287437570534227e-05, "loss": 2.8042, "step": 5444 }, { "epoch": 0.4465078145016654, "grad_norm": 0.47021788358688354, "learning_rate": 6.28481749421503e-05, "loss": 2.8058, "step": 5446 }, { "epoch": 0.4466717909300538, "grad_norm": 0.4658445119857788, "learning_rate": 6.282197040098273e-05, "loss": 2.8652, "step": 5448 }, { "epoch": 0.4468357673584422, "grad_norm": 0.45639750361442566, "learning_rate": 6.279576208954487e-05, "loss": 2.8018, "step": 5450 }, { "epoch": 0.44699974378683066, "grad_norm": 0.45397526025772095, "learning_rate": 6.276955001554324e-05, "loss": 2.805, "step": 5452 }, { "epoch": 0.44716372021521905, "grad_norm": 0.4116973578929901, "learning_rate": 6.274333418668545e-05, "loss": 2.7981, "step": 5454 }, { "epoch": 0.4473276966436075, "grad_norm": 0.41127046942710876, "learning_rate": 6.27171146106802e-05, "loss": 2.8525, "step": 5456 }, { "epoch": 0.4474916730719959, "grad_norm": 0.4021296203136444, "learning_rate": 6.269089129523729e-05, "loss": 2.8321, "step": 5458 }, { "epoch": 0.44765564950038433, "grad_norm": 0.41415926814079285, "learning_rate": 6.266466424806762e-05, "loss": 2.8192, "step": 5460 }, { "epoch": 0.4478196259287727, "grad_norm": 0.4048265814781189, "learning_rate": 6.26384334768832e-05, "loss": 2.7977, "step": 5462 }, { "epoch": 0.44798360235716117, "grad_norm": 0.4210762083530426, "learning_rate": 6.261219898939712e-05, "loss": 2.8261, "step": 5464 }, { "epoch": 0.44814757878554956, "grad_norm": 0.43128538131713867, "learning_rate": 6.258596079332357e-05, "loss": 2.8386, "step": 5466 }, { "epoch": 0.448311555213938, "grad_norm": 0.42629754543304443, "learning_rate": 6.255971889637785e-05, "loss": 2.8455, "step": 5468 }, { "epoch": 0.4484755316423264, "grad_norm": 0.4861750304698944, "learning_rate": 6.25334733062763e-05, "loss": 2.8276, "step": 5470 }, { "epoch": 0.44863950807071484, "grad_norm": 0.4479605555534363, "learning_rate": 6.250722403073639e-05, "loss": 2.7939, "step": 5472 }, { "epoch": 0.44880348449910323, "grad_norm": 0.4483131170272827, "learning_rate": 6.248097107747665e-05, "loss": 2.8228, "step": 5474 }, { "epoch": 0.4489674609274917, "grad_norm": 0.48584070801734924, "learning_rate": 6.245471445421669e-05, "loss": 2.8331, "step": 5476 }, { "epoch": 0.44913143735588007, "grad_norm": 0.47229447960853577, "learning_rate": 6.242845416867721e-05, "loss": 2.7656, "step": 5478 }, { "epoch": 0.4492954137842685, "grad_norm": 0.45715320110321045, "learning_rate": 6.240219022858003e-05, "loss": 2.7746, "step": 5480 }, { "epoch": 0.4494593902126569, "grad_norm": 0.40935462713241577, "learning_rate": 6.237592264164794e-05, "loss": 2.7756, "step": 5482 }, { "epoch": 0.44962336664104535, "grad_norm": 0.42745450139045715, "learning_rate": 6.234965141560488e-05, "loss": 2.8465, "step": 5484 }, { "epoch": 0.44978734306943374, "grad_norm": 0.42606422305107117, "learning_rate": 6.232337655817587e-05, "loss": 2.8248, "step": 5486 }, { "epoch": 0.4499513194978222, "grad_norm": 0.455293744802475, "learning_rate": 6.229709807708694e-05, "loss": 2.8302, "step": 5488 }, { "epoch": 0.45011529592621063, "grad_norm": 0.43568676710128784, "learning_rate": 6.227081598006523e-05, "loss": 2.8069, "step": 5490 }, { "epoch": 0.450279272354599, "grad_norm": 0.43974193930625916, "learning_rate": 6.224453027483891e-05, "loss": 2.8403, "step": 5492 }, { "epoch": 0.45044324878298747, "grad_norm": 0.42426374554634094, "learning_rate": 6.221824096913727e-05, "loss": 2.8064, "step": 5494 }, { "epoch": 0.45060722521137586, "grad_norm": 0.4468149244785309, "learning_rate": 6.219194807069057e-05, "loss": 2.8305, "step": 5496 }, { "epoch": 0.4507712016397643, "grad_norm": 0.4190889596939087, "learning_rate": 6.216565158723022e-05, "loss": 2.8208, "step": 5498 }, { "epoch": 0.4509351780681527, "grad_norm": 0.4246452748775482, "learning_rate": 6.21393515264886e-05, "loss": 2.8203, "step": 5500 }, { "epoch": 0.45109915449654114, "grad_norm": 0.45627468824386597, "learning_rate": 6.211304789619918e-05, "loss": 2.8507, "step": 5502 }, { "epoch": 0.45126313092492953, "grad_norm": 0.45805448293685913, "learning_rate": 6.208674070409653e-05, "loss": 2.8456, "step": 5504 }, { "epoch": 0.451427107353318, "grad_norm": 0.4606642425060272, "learning_rate": 6.206042995791617e-05, "loss": 2.8356, "step": 5506 }, { "epoch": 0.45159108378170637, "grad_norm": 0.5084508657455444, "learning_rate": 6.203411566539472e-05, "loss": 2.7843, "step": 5508 }, { "epoch": 0.4517550602100948, "grad_norm": 0.4745926260948181, "learning_rate": 6.200779783426986e-05, "loss": 2.8339, "step": 5510 }, { "epoch": 0.4519190366384832, "grad_norm": 0.4591147303581238, "learning_rate": 6.198147647228027e-05, "loss": 2.8131, "step": 5512 }, { "epoch": 0.45208301306687165, "grad_norm": 0.42851564288139343, "learning_rate": 6.195515158716567e-05, "loss": 2.8114, "step": 5514 }, { "epoch": 0.45224698949526004, "grad_norm": 0.42879414558410645, "learning_rate": 6.192882318666687e-05, "loss": 2.784, "step": 5516 }, { "epoch": 0.4524109659236485, "grad_norm": 0.4082982838153839, "learning_rate": 6.190249127852565e-05, "loss": 2.8368, "step": 5518 }, { "epoch": 0.4525749423520369, "grad_norm": 0.4085899889469147, "learning_rate": 6.187615587048483e-05, "loss": 2.8495, "step": 5520 }, { "epoch": 0.4527389187804253, "grad_norm": 0.43112874031066895, "learning_rate": 6.18498169702883e-05, "loss": 2.7851, "step": 5522 }, { "epoch": 0.4529028952088137, "grad_norm": 0.438242644071579, "learning_rate": 6.182347458568096e-05, "loss": 2.8398, "step": 5524 }, { "epoch": 0.45306687163720216, "grad_norm": 0.48834484815597534, "learning_rate": 6.179712872440869e-05, "loss": 2.8107, "step": 5526 }, { "epoch": 0.45323084806559055, "grad_norm": 0.5261332988739014, "learning_rate": 6.177077939421845e-05, "loss": 2.8842, "step": 5528 }, { "epoch": 0.453394824493979, "grad_norm": 0.4741848409175873, "learning_rate": 6.174442660285818e-05, "loss": 2.8348, "step": 5530 }, { "epoch": 0.4535588009223674, "grad_norm": 0.43609732389450073, "learning_rate": 6.171807035807689e-05, "loss": 2.7855, "step": 5532 }, { "epoch": 0.45372277735075583, "grad_norm": 0.4651409089565277, "learning_rate": 6.169171066762456e-05, "loss": 2.8238, "step": 5534 }, { "epoch": 0.4538867537791442, "grad_norm": 0.4825083017349243, "learning_rate": 6.166534753925218e-05, "loss": 2.8005, "step": 5536 }, { "epoch": 0.45405073020753267, "grad_norm": 0.4591177701950073, "learning_rate": 6.163898098071178e-05, "loss": 2.8426, "step": 5538 }, { "epoch": 0.45421470663592106, "grad_norm": 0.4679067134857178, "learning_rate": 6.161261099975638e-05, "loss": 2.8282, "step": 5540 }, { "epoch": 0.4543786830643095, "grad_norm": 0.4355182647705078, "learning_rate": 6.158623760414002e-05, "loss": 2.7863, "step": 5542 }, { "epoch": 0.4545426594926979, "grad_norm": 0.43111327290534973, "learning_rate": 6.155986080161771e-05, "loss": 2.8526, "step": 5544 }, { "epoch": 0.45470663592108634, "grad_norm": 0.4271221160888672, "learning_rate": 6.153348059994551e-05, "loss": 2.8234, "step": 5546 }, { "epoch": 0.4548706123494748, "grad_norm": 0.4791860580444336, "learning_rate": 6.150709700688045e-05, "loss": 2.8127, "step": 5548 }, { "epoch": 0.4550345887778632, "grad_norm": 0.4541807174682617, "learning_rate": 6.148071003018055e-05, "loss": 2.8017, "step": 5550 }, { "epoch": 0.4551985652062516, "grad_norm": 0.43343642354011536, "learning_rate": 6.145431967760487e-05, "loss": 2.8638, "step": 5552 }, { "epoch": 0.45536254163464, "grad_norm": 0.42656823992729187, "learning_rate": 6.142792595691342e-05, "loss": 2.7703, "step": 5554 }, { "epoch": 0.45552651806302846, "grad_norm": 0.4463464915752411, "learning_rate": 6.140152887586718e-05, "loss": 2.8374, "step": 5556 }, { "epoch": 0.45569049449141685, "grad_norm": 0.4074001610279083, "learning_rate": 6.137512844222818e-05, "loss": 2.8707, "step": 5558 }, { "epoch": 0.4558544709198053, "grad_norm": 0.43333056569099426, "learning_rate": 6.13487246637594e-05, "loss": 2.8052, "step": 5560 }, { "epoch": 0.4560184473481937, "grad_norm": 0.4388822317123413, "learning_rate": 6.132231754822482e-05, "loss": 2.8677, "step": 5562 }, { "epoch": 0.45618242377658214, "grad_norm": 0.4309976100921631, "learning_rate": 6.129590710338937e-05, "loss": 2.8462, "step": 5564 }, { "epoch": 0.4563464002049705, "grad_norm": 0.45867303013801575, "learning_rate": 6.1269493337019e-05, "loss": 2.8331, "step": 5566 }, { "epoch": 0.45651037663335897, "grad_norm": 0.4165544807910919, "learning_rate": 6.124307625688057e-05, "loss": 2.8549, "step": 5568 }, { "epoch": 0.45667435306174736, "grad_norm": 0.3921827971935272, "learning_rate": 6.121665587074203e-05, "loss": 2.7879, "step": 5570 }, { "epoch": 0.4568383294901358, "grad_norm": 0.4092395305633545, "learning_rate": 6.119023218637217e-05, "loss": 2.7812, "step": 5572 }, { "epoch": 0.4570023059185242, "grad_norm": 0.41083306074142456, "learning_rate": 6.116380521154083e-05, "loss": 2.8403, "step": 5574 }, { "epoch": 0.45716628234691264, "grad_norm": 0.4270792305469513, "learning_rate": 6.113737495401885e-05, "loss": 2.7678, "step": 5576 }, { "epoch": 0.45733025877530104, "grad_norm": 0.4373821020126343, "learning_rate": 6.11109414215779e-05, "loss": 2.8208, "step": 5578 }, { "epoch": 0.4574942352036895, "grad_norm": 0.4400310218334198, "learning_rate": 6.108450462199077e-05, "loss": 2.8128, "step": 5580 }, { "epoch": 0.45765821163207787, "grad_norm": 0.47952520847320557, "learning_rate": 6.10580645630311e-05, "loss": 2.8685, "step": 5582 }, { "epoch": 0.4578221880604663, "grad_norm": 0.4763694703578949, "learning_rate": 6.1031621252473536e-05, "loss": 2.7871, "step": 5584 }, { "epoch": 0.4579861644888547, "grad_norm": 0.4709533154964447, "learning_rate": 6.100517469809368e-05, "loss": 2.7995, "step": 5586 }, { "epoch": 0.45815014091724315, "grad_norm": 0.4494019150733948, "learning_rate": 6.097872490766807e-05, "loss": 2.7969, "step": 5588 }, { "epoch": 0.45831411734563154, "grad_norm": 0.512210488319397, "learning_rate": 6.0952271888974214e-05, "loss": 2.8279, "step": 5590 }, { "epoch": 0.45847809377402, "grad_norm": 0.4916641116142273, "learning_rate": 6.092581564979053e-05, "loss": 2.8165, "step": 5592 }, { "epoch": 0.4586420702024084, "grad_norm": 0.4593188762664795, "learning_rate": 6.089935619789646e-05, "loss": 2.8759, "step": 5594 }, { "epoch": 0.4588060466307968, "grad_norm": 0.4758460223674774, "learning_rate": 6.087289354107229e-05, "loss": 2.7869, "step": 5596 }, { "epoch": 0.4589700230591852, "grad_norm": 0.4455423653125763, "learning_rate": 6.084642768709935e-05, "loss": 2.7912, "step": 5598 }, { "epoch": 0.45913399948757366, "grad_norm": 0.46677160263061523, "learning_rate": 6.0819958643759855e-05, "loss": 2.805, "step": 5600 }, { "epoch": 0.45929797591596205, "grad_norm": 0.44516557455062866, "learning_rate": 6.079348641883693e-05, "loss": 2.8097, "step": 5602 }, { "epoch": 0.4594619523443505, "grad_norm": 0.43633347749710083, "learning_rate": 6.076701102011471e-05, "loss": 2.8249, "step": 5604 }, { "epoch": 0.45962592877273895, "grad_norm": 0.39614197611808777, "learning_rate": 6.0740532455378194e-05, "loss": 2.7605, "step": 5606 }, { "epoch": 0.45978990520112734, "grad_norm": 0.4021206498146057, "learning_rate": 6.0714050732413376e-05, "loss": 2.8536, "step": 5608 }, { "epoch": 0.4599538816295158, "grad_norm": 0.4307616651058197, "learning_rate": 6.06875658590071e-05, "loss": 2.8353, "step": 5610 }, { "epoch": 0.46011785805790417, "grad_norm": 0.4669673442840576, "learning_rate": 6.066107784294723e-05, "loss": 2.8005, "step": 5612 }, { "epoch": 0.4602818344862926, "grad_norm": 0.4729499816894531, "learning_rate": 6.0634586692022454e-05, "loss": 2.846, "step": 5614 }, { "epoch": 0.460445810914681, "grad_norm": 0.486903578042984, "learning_rate": 6.0608092414022466e-05, "loss": 2.8256, "step": 5616 }, { "epoch": 0.46060978734306945, "grad_norm": 0.4671494960784912, "learning_rate": 6.058159501673785e-05, "loss": 2.7911, "step": 5618 }, { "epoch": 0.46077376377145784, "grad_norm": 0.49648165702819824, "learning_rate": 6.055509450796008e-05, "loss": 2.783, "step": 5620 }, { "epoch": 0.4609377401998463, "grad_norm": 0.5165703296661377, "learning_rate": 6.052859089548157e-05, "loss": 2.8046, "step": 5622 }, { "epoch": 0.4611017166282347, "grad_norm": 0.4854130148887634, "learning_rate": 6.0502084187095674e-05, "loss": 2.825, "step": 5624 }, { "epoch": 0.4612656930566231, "grad_norm": 0.5459082722663879, "learning_rate": 6.04755743905966e-05, "loss": 2.8237, "step": 5626 }, { "epoch": 0.4614296694850115, "grad_norm": 0.47205623984336853, "learning_rate": 6.0449061513779507e-05, "loss": 2.8149, "step": 5628 }, { "epoch": 0.46159364591339996, "grad_norm": 0.45652610063552856, "learning_rate": 6.0422545564440424e-05, "loss": 2.8339, "step": 5630 }, { "epoch": 0.46175762234178835, "grad_norm": 0.47060245275497437, "learning_rate": 6.039602655037634e-05, "loss": 2.7832, "step": 5632 }, { "epoch": 0.4619215987701768, "grad_norm": 0.437139630317688, "learning_rate": 6.0369504479385055e-05, "loss": 2.7565, "step": 5634 }, { "epoch": 0.4620855751985652, "grad_norm": 0.4424857497215271, "learning_rate": 6.034297935926537e-05, "loss": 2.7699, "step": 5636 }, { "epoch": 0.46224955162695364, "grad_norm": 0.41876840591430664, "learning_rate": 6.0316451197816905e-05, "loss": 2.7735, "step": 5638 }, { "epoch": 0.462413528055342, "grad_norm": 0.608383059501648, "learning_rate": 6.028992000284022e-05, "loss": 2.8363, "step": 5640 }, { "epoch": 0.4625775044837305, "grad_norm": 0.43988195061683655, "learning_rate": 6.026338578213675e-05, "loss": 2.7752, "step": 5642 }, { "epoch": 0.46274148091211886, "grad_norm": 0.39045771956443787, "learning_rate": 6.0236848543508804e-05, "loss": 2.7811, "step": 5644 }, { "epoch": 0.4629054573405073, "grad_norm": 0.4120277464389801, "learning_rate": 6.021030829475961e-05, "loss": 2.8247, "step": 5646 }, { "epoch": 0.4630694337688957, "grad_norm": 0.4452279508113861, "learning_rate": 6.018376504369326e-05, "loss": 2.8105, "step": 5648 }, { "epoch": 0.46323341019728415, "grad_norm": 0.43291357159614563, "learning_rate": 6.015721879811473e-05, "loss": 2.8537, "step": 5650 }, { "epoch": 0.46339738662567254, "grad_norm": 0.44539210200309753, "learning_rate": 6.01306695658299e-05, "loss": 2.7894, "step": 5652 }, { "epoch": 0.463561363054061, "grad_norm": 0.4763514995574951, "learning_rate": 6.01041173546455e-05, "loss": 2.8683, "step": 5654 }, { "epoch": 0.4637253394824494, "grad_norm": 0.4426315426826477, "learning_rate": 6.0077562172369136e-05, "loss": 2.7684, "step": 5656 }, { "epoch": 0.4638893159108378, "grad_norm": 0.43244004249572754, "learning_rate": 6.00510040268093e-05, "loss": 2.7787, "step": 5658 }, { "epoch": 0.4640532923392262, "grad_norm": 0.4351418614387512, "learning_rate": 6.002444292577536e-05, "loss": 2.8063, "step": 5660 }, { "epoch": 0.46421726876761465, "grad_norm": 0.4126474857330322, "learning_rate": 5.999787887707753e-05, "loss": 2.8737, "step": 5662 }, { "epoch": 0.4643812451960031, "grad_norm": 0.4187913239002228, "learning_rate": 5.997131188852691e-05, "loss": 2.8078, "step": 5664 }, { "epoch": 0.4645452216243915, "grad_norm": 0.4025074541568756, "learning_rate": 5.994474196793549e-05, "loss": 2.8085, "step": 5666 }, { "epoch": 0.46470919805277994, "grad_norm": 0.4166587293148041, "learning_rate": 5.991816912311606e-05, "loss": 2.7544, "step": 5668 }, { "epoch": 0.4648731744811683, "grad_norm": 0.4639766216278076, "learning_rate": 5.9891593361882306e-05, "loss": 2.8213, "step": 5670 }, { "epoch": 0.4650371509095568, "grad_norm": 0.4072076082229614, "learning_rate": 5.986501469204878e-05, "loss": 2.8197, "step": 5672 }, { "epoch": 0.46520112733794516, "grad_norm": 0.4271763563156128, "learning_rate": 5.983843312143087e-05, "loss": 2.8048, "step": 5674 }, { "epoch": 0.4653651037663336, "grad_norm": 0.4413955807685852, "learning_rate": 5.981184865784484e-05, "loss": 2.7811, "step": 5676 }, { "epoch": 0.465529080194722, "grad_norm": 0.49118563532829285, "learning_rate": 5.978526130910775e-05, "loss": 2.804, "step": 5678 }, { "epoch": 0.46569305662311045, "grad_norm": 0.48541152477264404, "learning_rate": 5.9758671083037596e-05, "loss": 2.8017, "step": 5680 }, { "epoch": 0.46585703305149884, "grad_norm": 0.43740737438201904, "learning_rate": 5.973207798745313e-05, "loss": 2.8365, "step": 5682 }, { "epoch": 0.4660210094798873, "grad_norm": 0.43544474244117737, "learning_rate": 5.970548203017402e-05, "loss": 2.8059, "step": 5684 }, { "epoch": 0.4661849859082757, "grad_norm": 0.4222916066646576, "learning_rate": 5.967888321902072e-05, "loss": 2.7608, "step": 5686 }, { "epoch": 0.4663489623366641, "grad_norm": 0.3928931653499603, "learning_rate": 5.965228156181457e-05, "loss": 2.8234, "step": 5688 }, { "epoch": 0.4665129387650525, "grad_norm": 0.4149591624736786, "learning_rate": 5.9625677066377714e-05, "loss": 2.8284, "step": 5690 }, { "epoch": 0.46667691519344096, "grad_norm": 0.4028678238391876, "learning_rate": 5.959906974053313e-05, "loss": 2.7957, "step": 5692 }, { "epoch": 0.46684089162182935, "grad_norm": 0.409410685300827, "learning_rate": 5.9572459592104654e-05, "loss": 2.7978, "step": 5694 }, { "epoch": 0.4670048680502178, "grad_norm": 0.44319820404052734, "learning_rate": 5.9545846628916957e-05, "loss": 2.7922, "step": 5696 }, { "epoch": 0.4671688444786062, "grad_norm": 0.4542018473148346, "learning_rate": 5.951923085879547e-05, "loss": 2.7683, "step": 5698 }, { "epoch": 0.46733282090699463, "grad_norm": 0.4291388988494873, "learning_rate": 5.949261228956654e-05, "loss": 2.7518, "step": 5700 }, { "epoch": 0.467496797335383, "grad_norm": 0.449232816696167, "learning_rate": 5.946599092905728e-05, "loss": 2.8088, "step": 5702 }, { "epoch": 0.46766077376377146, "grad_norm": 0.4024021625518799, "learning_rate": 5.943936678509563e-05, "loss": 2.7663, "step": 5704 }, { "epoch": 0.46782475019215986, "grad_norm": 0.41187432408332825, "learning_rate": 5.9412739865510356e-05, "loss": 2.7777, "step": 5706 }, { "epoch": 0.4679887266205483, "grad_norm": 0.42438969016075134, "learning_rate": 5.9386110178131074e-05, "loss": 2.8298, "step": 5708 }, { "epoch": 0.4681527030489367, "grad_norm": 0.4174380600452423, "learning_rate": 5.9359477730788135e-05, "loss": 2.758, "step": 5710 }, { "epoch": 0.46831667947732514, "grad_norm": 0.4812970757484436, "learning_rate": 5.933284253131277e-05, "loss": 2.808, "step": 5712 }, { "epoch": 0.46848065590571353, "grad_norm": 0.48399415612220764, "learning_rate": 5.930620458753701e-05, "loss": 2.7885, "step": 5714 }, { "epoch": 0.468644632334102, "grad_norm": 0.5062437653541565, "learning_rate": 5.927956390729364e-05, "loss": 2.8157, "step": 5716 }, { "epoch": 0.4688086087624904, "grad_norm": 0.46985939145088196, "learning_rate": 5.925292049841633e-05, "loss": 2.7961, "step": 5718 }, { "epoch": 0.4689725851908788, "grad_norm": 0.456906795501709, "learning_rate": 5.922627436873951e-05, "loss": 2.8435, "step": 5720 }, { "epoch": 0.46913656161926726, "grad_norm": 0.47446414828300476, "learning_rate": 5.919962552609838e-05, "loss": 2.8044, "step": 5722 }, { "epoch": 0.46930053804765565, "grad_norm": 0.4095275104045868, "learning_rate": 5.9172973978328994e-05, "loss": 2.731, "step": 5724 }, { "epoch": 0.4694645144760441, "grad_norm": 0.38491547107696533, "learning_rate": 5.914631973326819e-05, "loss": 2.7328, "step": 5726 }, { "epoch": 0.4696284909044325, "grad_norm": 0.40289902687072754, "learning_rate": 5.911966279875354e-05, "loss": 2.749, "step": 5728 }, { "epoch": 0.46979246733282093, "grad_norm": 0.42473292350769043, "learning_rate": 5.9093003182623474e-05, "loss": 2.8337, "step": 5730 }, { "epoch": 0.4699564437612093, "grad_norm": 0.4326395094394684, "learning_rate": 5.906634089271722e-05, "loss": 2.8016, "step": 5732 }, { "epoch": 0.47012042018959777, "grad_norm": 0.4024275541305542, "learning_rate": 5.903967593687472e-05, "loss": 2.7876, "step": 5734 }, { "epoch": 0.47028439661798616, "grad_norm": 0.44340696930885315, "learning_rate": 5.901300832293677e-05, "loss": 2.784, "step": 5736 }, { "epoch": 0.4704483730463746, "grad_norm": 0.4537631869316101, "learning_rate": 5.8986338058744905e-05, "loss": 2.8039, "step": 5738 }, { "epoch": 0.470612349474763, "grad_norm": 0.41661155223846436, "learning_rate": 5.895966515214145e-05, "loss": 2.8102, "step": 5740 }, { "epoch": 0.47077632590315144, "grad_norm": 0.4392305016517639, "learning_rate": 5.8932989610969516e-05, "loss": 2.8185, "step": 5742 }, { "epoch": 0.47094030233153983, "grad_norm": 0.46238356828689575, "learning_rate": 5.890631144307299e-05, "loss": 2.8385, "step": 5744 }, { "epoch": 0.4711042787599283, "grad_norm": 0.4088890254497528, "learning_rate": 5.887963065629652e-05, "loss": 2.836, "step": 5746 }, { "epoch": 0.47126825518831666, "grad_norm": 0.38375574350357056, "learning_rate": 5.8852947258485537e-05, "loss": 2.8207, "step": 5748 }, { "epoch": 0.4714322316167051, "grad_norm": 0.42347919940948486, "learning_rate": 5.88262612574862e-05, "loss": 2.7962, "step": 5750 }, { "epoch": 0.4715962080450935, "grad_norm": 0.4518338441848755, "learning_rate": 5.87995726611455e-05, "loss": 2.8055, "step": 5752 }, { "epoch": 0.47176018447348195, "grad_norm": 0.4793908894062042, "learning_rate": 5.877288147731114e-05, "loss": 2.841, "step": 5754 }, { "epoch": 0.47192416090187034, "grad_norm": 0.43137022852897644, "learning_rate": 5.87461877138316e-05, "loss": 2.8008, "step": 5756 }, { "epoch": 0.4720881373302588, "grad_norm": 0.40921229124069214, "learning_rate": 5.871949137855612e-05, "loss": 2.7741, "step": 5758 }, { "epoch": 0.4722521137586472, "grad_norm": 0.40755629539489746, "learning_rate": 5.869279247933469e-05, "loss": 2.8096, "step": 5760 }, { "epoch": 0.4724160901870356, "grad_norm": 0.3983427584171295, "learning_rate": 5.866609102401808e-05, "loss": 2.8176, "step": 5762 }, { "epoch": 0.472580066615424, "grad_norm": 0.3668491244316101, "learning_rate": 5.863938702045777e-05, "loss": 2.8583, "step": 5764 }, { "epoch": 0.47274404304381246, "grad_norm": 0.394542396068573, "learning_rate": 5.8612680476506e-05, "loss": 2.8073, "step": 5766 }, { "epoch": 0.47290801947220085, "grad_norm": 0.40968987345695496, "learning_rate": 5.8585971400015794e-05, "loss": 2.7961, "step": 5768 }, { "epoch": 0.4730719959005893, "grad_norm": 0.4503387212753296, "learning_rate": 5.855925979884088e-05, "loss": 2.7981, "step": 5770 }, { "epoch": 0.4732359723289777, "grad_norm": 0.4372981786727905, "learning_rate": 5.853254568083574e-05, "loss": 2.7914, "step": 5772 }, { "epoch": 0.47339994875736613, "grad_norm": 0.3964093029499054, "learning_rate": 5.85058290538556e-05, "loss": 2.7809, "step": 5774 }, { "epoch": 0.4735639251857546, "grad_norm": 0.43268218636512756, "learning_rate": 5.8479109925756405e-05, "loss": 2.8701, "step": 5776 }, { "epoch": 0.47372790161414297, "grad_norm": 0.3940499722957611, "learning_rate": 5.845238830439488e-05, "loss": 2.8071, "step": 5778 }, { "epoch": 0.4738918780425314, "grad_norm": 0.419542521238327, "learning_rate": 5.842566419762844e-05, "loss": 2.7558, "step": 5780 }, { "epoch": 0.4740558544709198, "grad_norm": 0.4414413571357727, "learning_rate": 5.839893761331524e-05, "loss": 2.8215, "step": 5782 }, { "epoch": 0.47421983089930825, "grad_norm": 0.44728782773017883, "learning_rate": 5.8372208559314177e-05, "loss": 2.7318, "step": 5784 }, { "epoch": 0.47438380732769664, "grad_norm": 0.43412888050079346, "learning_rate": 5.834547704348487e-05, "loss": 2.8587, "step": 5786 }, { "epoch": 0.4745477837560851, "grad_norm": 0.4343017637729645, "learning_rate": 5.831874307368766e-05, "loss": 2.7432, "step": 5788 }, { "epoch": 0.4747117601844735, "grad_norm": 0.4207670986652374, "learning_rate": 5.8292006657783595e-05, "loss": 2.8013, "step": 5790 }, { "epoch": 0.4748757366128619, "grad_norm": 0.3970639407634735, "learning_rate": 5.826526780363447e-05, "loss": 2.8342, "step": 5792 }, { "epoch": 0.4750397130412503, "grad_norm": 0.39141160249710083, "learning_rate": 5.823852651910278e-05, "loss": 2.8018, "step": 5794 }, { "epoch": 0.47520368946963876, "grad_norm": 0.4473559260368347, "learning_rate": 5.821178281205171e-05, "loss": 2.7463, "step": 5796 }, { "epoch": 0.47536766589802715, "grad_norm": 0.4763402044773102, "learning_rate": 5.8185036690345226e-05, "loss": 2.8499, "step": 5798 }, { "epoch": 0.4755316423264156, "grad_norm": 0.4592927396297455, "learning_rate": 5.815828816184793e-05, "loss": 2.792, "step": 5800 }, { "epoch": 0.475695618754804, "grad_norm": 0.4712817072868347, "learning_rate": 5.813153723442518e-05, "loss": 2.792, "step": 5802 }, { "epoch": 0.47585959518319243, "grad_norm": 0.4331757724285126, "learning_rate": 5.810478391594303e-05, "loss": 2.7926, "step": 5804 }, { "epoch": 0.4760235716115808, "grad_norm": 0.4528201222419739, "learning_rate": 5.80780282142682e-05, "loss": 2.7966, "step": 5806 }, { "epoch": 0.47618754803996927, "grad_norm": 0.46794208884239197, "learning_rate": 5.805127013726816e-05, "loss": 2.8304, "step": 5808 }, { "epoch": 0.47635152446835766, "grad_norm": 0.4737168252468109, "learning_rate": 5.802450969281109e-05, "loss": 2.8088, "step": 5810 }, { "epoch": 0.4765155008967461, "grad_norm": 0.4532864987850189, "learning_rate": 5.799774688876578e-05, "loss": 2.8326, "step": 5812 }, { "epoch": 0.4766794773251345, "grad_norm": 0.38387665152549744, "learning_rate": 5.7970981733001813e-05, "loss": 2.7919, "step": 5814 }, { "epoch": 0.47684345375352294, "grad_norm": 0.3972019851207733, "learning_rate": 5.7944214233389397e-05, "loss": 2.8352, "step": 5816 }, { "epoch": 0.47700743018191133, "grad_norm": 0.3993661105632782, "learning_rate": 5.791744439779946e-05, "loss": 2.8333, "step": 5818 }, { "epoch": 0.4771714066102998, "grad_norm": 0.42471030354499817, "learning_rate": 5.7890672234103604e-05, "loss": 2.8133, "step": 5820 }, { "epoch": 0.47733538303868817, "grad_norm": 0.46742257475852966, "learning_rate": 5.786389775017413e-05, "loss": 2.8554, "step": 5822 }, { "epoch": 0.4774993594670766, "grad_norm": 0.38363349437713623, "learning_rate": 5.7837120953884e-05, "loss": 2.8268, "step": 5824 }, { "epoch": 0.477663335895465, "grad_norm": 0.4112091660499573, "learning_rate": 5.7810341853106875e-05, "loss": 2.832, "step": 5826 }, { "epoch": 0.47782731232385345, "grad_norm": 0.3802869915962219, "learning_rate": 5.77835604557171e-05, "loss": 2.8291, "step": 5828 }, { "epoch": 0.47799128875224184, "grad_norm": 0.38989999890327454, "learning_rate": 5.775677676958965e-05, "loss": 2.8092, "step": 5830 }, { "epoch": 0.4781552651806303, "grad_norm": 0.3859577476978302, "learning_rate": 5.772999080260023e-05, "loss": 2.7836, "step": 5832 }, { "epoch": 0.47831924160901873, "grad_norm": 0.4060695767402649, "learning_rate": 5.7703202562625195e-05, "loss": 2.7615, "step": 5834 }, { "epoch": 0.4784832180374071, "grad_norm": 0.41608673334121704, "learning_rate": 5.767641205754153e-05, "loss": 2.8321, "step": 5836 }, { "epoch": 0.47864719446579557, "grad_norm": 0.4441404938697815, "learning_rate": 5.764961929522697e-05, "loss": 2.7576, "step": 5838 }, { "epoch": 0.47881117089418396, "grad_norm": 0.45295077562332153, "learning_rate": 5.762282428355983e-05, "loss": 2.8065, "step": 5840 }, { "epoch": 0.4789751473225724, "grad_norm": 0.4226892292499542, "learning_rate": 5.759602703041911e-05, "loss": 2.8324, "step": 5842 }, { "epoch": 0.4791391237509608, "grad_norm": 0.39947935938835144, "learning_rate": 5.75692275436845e-05, "loss": 2.8515, "step": 5844 }, { "epoch": 0.47930310017934924, "grad_norm": 0.4010763466358185, "learning_rate": 5.754242583123633e-05, "loss": 2.8421, "step": 5846 }, { "epoch": 0.47946707660773763, "grad_norm": 0.42324477434158325, "learning_rate": 5.751562190095557e-05, "loss": 2.8077, "step": 5848 }, { "epoch": 0.4796310530361261, "grad_norm": 0.4278055429458618, "learning_rate": 5.748881576072386e-05, "loss": 2.7678, "step": 5850 }, { "epoch": 0.47979502946451447, "grad_norm": 0.46750038862228394, "learning_rate": 5.746200741842348e-05, "loss": 2.7893, "step": 5852 }, { "epoch": 0.4799590058929029, "grad_norm": 0.40809497237205505, "learning_rate": 5.743519688193735e-05, "loss": 2.7416, "step": 5854 }, { "epoch": 0.4801229823212913, "grad_norm": 0.4718024730682373, "learning_rate": 5.7408384159149055e-05, "loss": 2.796, "step": 5856 }, { "epoch": 0.48028695874967975, "grad_norm": 0.42716535925865173, "learning_rate": 5.738156925794281e-05, "loss": 2.8112, "step": 5858 }, { "epoch": 0.48045093517806814, "grad_norm": 0.4023424983024597, "learning_rate": 5.735475218620349e-05, "loss": 2.7993, "step": 5860 }, { "epoch": 0.4806149116064566, "grad_norm": 0.39570191502571106, "learning_rate": 5.7327932951816565e-05, "loss": 2.8071, "step": 5862 }, { "epoch": 0.480778888034845, "grad_norm": 0.4263247549533844, "learning_rate": 5.730111156266819e-05, "loss": 2.7752, "step": 5864 }, { "epoch": 0.4809428644632334, "grad_norm": 0.4379764795303345, "learning_rate": 5.7274288026645104e-05, "loss": 2.7412, "step": 5866 }, { "epoch": 0.4811068408916218, "grad_norm": 0.4216366112232208, "learning_rate": 5.724746235163473e-05, "loss": 2.7588, "step": 5868 }, { "epoch": 0.48127081732001026, "grad_norm": 0.4250224232673645, "learning_rate": 5.722063454552509e-05, "loss": 2.8584, "step": 5870 }, { "epoch": 0.48143479374839865, "grad_norm": 0.44280561804771423, "learning_rate": 5.7193804616204826e-05, "loss": 2.8243, "step": 5872 }, { "epoch": 0.4815987701767871, "grad_norm": 0.44614461064338684, "learning_rate": 5.716697257156322e-05, "loss": 2.7236, "step": 5874 }, { "epoch": 0.4817627466051755, "grad_norm": 0.44351133704185486, "learning_rate": 5.71401384194902e-05, "loss": 2.7618, "step": 5876 }, { "epoch": 0.48192672303356393, "grad_norm": 0.450923353433609, "learning_rate": 5.711330216787624e-05, "loss": 2.7848, "step": 5878 }, { "epoch": 0.4820906994619523, "grad_norm": 0.45195823907852173, "learning_rate": 5.70864638246125e-05, "loss": 2.8052, "step": 5880 }, { "epoch": 0.48225467589034077, "grad_norm": 0.4296433627605438, "learning_rate": 5.7059623397590734e-05, "loss": 2.7922, "step": 5882 }, { "epoch": 0.48241865231872916, "grad_norm": 0.42876946926116943, "learning_rate": 5.703278089470331e-05, "loss": 2.8272, "step": 5884 }, { "epoch": 0.4825826287471176, "grad_norm": 0.42906731367111206, "learning_rate": 5.700593632384318e-05, "loss": 2.8105, "step": 5886 }, { "epoch": 0.482746605175506, "grad_norm": 0.4271446466445923, "learning_rate": 5.6979089692903954e-05, "loss": 2.8201, "step": 5888 }, { "epoch": 0.48291058160389444, "grad_norm": 0.43975505232810974, "learning_rate": 5.6952241009779794e-05, "loss": 2.8063, "step": 5890 }, { "epoch": 0.4830745580322829, "grad_norm": 0.4173251688480377, "learning_rate": 5.692539028236551e-05, "loss": 2.8241, "step": 5892 }, { "epoch": 0.4832385344606713, "grad_norm": 0.43732768297195435, "learning_rate": 5.68985375185565e-05, "loss": 2.798, "step": 5894 }, { "epoch": 0.4834025108890597, "grad_norm": 0.42669641971588135, "learning_rate": 5.687168272624874e-05, "loss": 2.8024, "step": 5896 }, { "epoch": 0.4835664873174481, "grad_norm": 0.4393509328365326, "learning_rate": 5.6844825913338825e-05, "loss": 2.8367, "step": 5898 }, { "epoch": 0.48373046374583656, "grad_norm": 0.417496919631958, "learning_rate": 5.681796708772396e-05, "loss": 2.8294, "step": 5900 }, { "epoch": 0.48389444017422495, "grad_norm": 0.4174562990665436, "learning_rate": 5.6791106257301866e-05, "loss": 2.7937, "step": 5902 }, { "epoch": 0.4840584166026134, "grad_norm": 0.4357423782348633, "learning_rate": 5.676424342997095e-05, "loss": 2.794, "step": 5904 }, { "epoch": 0.4842223930310018, "grad_norm": 0.4304860234260559, "learning_rate": 5.673737861363016e-05, "loss": 2.8276, "step": 5906 }, { "epoch": 0.48438636945939023, "grad_norm": 0.4502744674682617, "learning_rate": 5.6710511816179036e-05, "loss": 2.7533, "step": 5908 }, { "epoch": 0.4845503458877786, "grad_norm": 0.44293758273124695, "learning_rate": 5.6683643045517665e-05, "loss": 2.8093, "step": 5910 }, { "epoch": 0.48471432231616707, "grad_norm": 0.4096347987651825, "learning_rate": 5.665677230954678e-05, "loss": 2.8277, "step": 5912 }, { "epoch": 0.48487829874455546, "grad_norm": 0.41238588094711304, "learning_rate": 5.6629899616167635e-05, "loss": 2.766, "step": 5914 }, { "epoch": 0.4850422751729439, "grad_norm": 0.4263080656528473, "learning_rate": 5.6603024973282096e-05, "loss": 2.8173, "step": 5916 }, { "epoch": 0.4852062516013323, "grad_norm": 0.39349424839019775, "learning_rate": 5.6576148388792575e-05, "loss": 2.8281, "step": 5918 }, { "epoch": 0.48537022802972074, "grad_norm": 0.4356912076473236, "learning_rate": 5.654926987060209e-05, "loss": 2.7676, "step": 5920 }, { "epoch": 0.48553420445810913, "grad_norm": 0.4062795341014862, "learning_rate": 5.6522389426614184e-05, "loss": 2.7863, "step": 5922 }, { "epoch": 0.4856981808864976, "grad_norm": 0.4009894132614136, "learning_rate": 5.649550706473301e-05, "loss": 2.7931, "step": 5924 }, { "epoch": 0.48586215731488597, "grad_norm": 0.4238545000553131, "learning_rate": 5.646862279286325e-05, "loss": 2.7869, "step": 5926 }, { "epoch": 0.4860261337432744, "grad_norm": 0.4274156391620636, "learning_rate": 5.6441736618910146e-05, "loss": 2.8036, "step": 5928 }, { "epoch": 0.4861901101716628, "grad_norm": 0.4315156042575836, "learning_rate": 5.6414848550779554e-05, "loss": 2.7888, "step": 5930 }, { "epoch": 0.48635408660005125, "grad_norm": 0.4721137285232544, "learning_rate": 5.638795859637782e-05, "loss": 2.7765, "step": 5932 }, { "epoch": 0.48651806302843964, "grad_norm": 0.4357931315898895, "learning_rate": 5.6361066763611856e-05, "loss": 2.8073, "step": 5934 }, { "epoch": 0.4866820394568281, "grad_norm": 0.4593020975589752, "learning_rate": 5.633417306038917e-05, "loss": 2.8103, "step": 5936 }, { "epoch": 0.4868460158852165, "grad_norm": 0.48641642928123474, "learning_rate": 5.630727749461781e-05, "loss": 2.7753, "step": 5938 }, { "epoch": 0.4870099923136049, "grad_norm": 0.4623355567455292, "learning_rate": 5.6280380074206296e-05, "loss": 2.7595, "step": 5940 }, { "epoch": 0.4871739687419933, "grad_norm": 0.4284757375717163, "learning_rate": 5.6253480807063785e-05, "loss": 2.7887, "step": 5942 }, { "epoch": 0.48733794517038176, "grad_norm": 0.4367896318435669, "learning_rate": 5.6226579701099955e-05, "loss": 2.7582, "step": 5944 }, { "epoch": 0.48750192159877015, "grad_norm": 0.42119714617729187, "learning_rate": 5.6199676764225e-05, "loss": 2.7911, "step": 5946 }, { "epoch": 0.4876658980271586, "grad_norm": 0.4299163222312927, "learning_rate": 5.617277200434965e-05, "loss": 2.8189, "step": 5948 }, { "epoch": 0.48782987445554704, "grad_norm": 0.44932517409324646, "learning_rate": 5.614586542938521e-05, "loss": 2.7833, "step": 5950 }, { "epoch": 0.48799385088393543, "grad_norm": 0.43705087900161743, "learning_rate": 5.6118957047243505e-05, "loss": 2.7381, "step": 5952 }, { "epoch": 0.4881578273123239, "grad_norm": 0.44094035029411316, "learning_rate": 5.609204686583683e-05, "loss": 2.7726, "step": 5954 }, { "epoch": 0.48832180374071227, "grad_norm": 0.45215317606925964, "learning_rate": 5.606513489307812e-05, "loss": 2.7216, "step": 5956 }, { "epoch": 0.4884857801691007, "grad_norm": 0.4417240619659424, "learning_rate": 5.603822113688074e-05, "loss": 2.79, "step": 5958 }, { "epoch": 0.4886497565974891, "grad_norm": 0.4115074872970581, "learning_rate": 5.6011305605158614e-05, "loss": 2.7628, "step": 5960 }, { "epoch": 0.48881373302587755, "grad_norm": 0.3991395831108093, "learning_rate": 5.598438830582623e-05, "loss": 2.7127, "step": 5962 }, { "epoch": 0.48897770945426594, "grad_norm": 0.42140302062034607, "learning_rate": 5.595746924679851e-05, "loss": 2.8474, "step": 5964 }, { "epoch": 0.4891416858826544, "grad_norm": 0.41280636191368103, "learning_rate": 5.593054843599097e-05, "loss": 2.7677, "step": 5966 }, { "epoch": 0.4893056623110428, "grad_norm": 0.40295860171318054, "learning_rate": 5.590362588131961e-05, "loss": 2.835, "step": 5968 }, { "epoch": 0.4894696387394312, "grad_norm": 0.41554921865463257, "learning_rate": 5.587670159070093e-05, "loss": 2.7716, "step": 5970 }, { "epoch": 0.4896336151678196, "grad_norm": 0.4251762926578522, "learning_rate": 5.5849775572051955e-05, "loss": 2.7546, "step": 5972 }, { "epoch": 0.48979759159620806, "grad_norm": 0.44020459055900574, "learning_rate": 5.582284783329022e-05, "loss": 2.8029, "step": 5974 }, { "epoch": 0.48996156802459645, "grad_norm": 0.45208802819252014, "learning_rate": 5.579591838233379e-05, "loss": 2.8717, "step": 5976 }, { "epoch": 0.4901255444529849, "grad_norm": 0.46185302734375, "learning_rate": 5.5768987227101167e-05, "loss": 2.8632, "step": 5978 }, { "epoch": 0.4902895208813733, "grad_norm": 0.4524150788784027, "learning_rate": 5.574205437551141e-05, "loss": 2.8004, "step": 5980 }, { "epoch": 0.49045349730976173, "grad_norm": 0.5525970458984375, "learning_rate": 5.5715119835484056e-05, "loss": 2.743, "step": 5982 }, { "epoch": 0.4906174737381501, "grad_norm": 0.5090713500976562, "learning_rate": 5.568818361493915e-05, "loss": 2.7977, "step": 5984 }, { "epoch": 0.49078145016653857, "grad_norm": 0.43728598952293396, "learning_rate": 5.566124572179723e-05, "loss": 2.7899, "step": 5986 }, { "epoch": 0.49094542659492696, "grad_norm": 0.4495003819465637, "learning_rate": 5.56343061639793e-05, "loss": 2.7825, "step": 5988 }, { "epoch": 0.4911094030233154, "grad_norm": 0.4431800842285156, "learning_rate": 5.5607364949406895e-05, "loss": 2.7782, "step": 5990 }, { "epoch": 0.4912733794517038, "grad_norm": 0.44171142578125, "learning_rate": 5.558042208600201e-05, "loss": 2.8494, "step": 5992 }, { "epoch": 0.49143735588009224, "grad_norm": 0.43040671944618225, "learning_rate": 5.5553477581687117e-05, "loss": 2.8051, "step": 5994 }, { "epoch": 0.49160133230848063, "grad_norm": 0.4206514358520508, "learning_rate": 5.552653144438521e-05, "loss": 2.7955, "step": 5996 }, { "epoch": 0.4917653087368691, "grad_norm": 0.4470977783203125, "learning_rate": 5.549958368201971e-05, "loss": 2.7861, "step": 5998 }, { "epoch": 0.49192928516525747, "grad_norm": 0.4228839874267578, "learning_rate": 5.547263430251456e-05, "loss": 2.8133, "step": 6000 }, { "epoch": 0.4920932615936459, "grad_norm": 0.4443196654319763, "learning_rate": 5.544568331379415e-05, "loss": 2.8121, "step": 6002 }, { "epoch": 0.49225723802203436, "grad_norm": 0.408275842666626, "learning_rate": 5.541873072378337e-05, "loss": 2.7748, "step": 6004 }, { "epoch": 0.49242121445042275, "grad_norm": 0.3874877989292145, "learning_rate": 5.539177654040755e-05, "loss": 2.8108, "step": 6006 }, { "epoch": 0.4925851908788112, "grad_norm": 0.39103594422340393, "learning_rate": 5.536482077159251e-05, "loss": 2.8343, "step": 6008 }, { "epoch": 0.4927491673071996, "grad_norm": 0.39628902077674866, "learning_rate": 5.533786342526455e-05, "loss": 2.7583, "step": 6010 }, { "epoch": 0.49291314373558803, "grad_norm": 0.44031769037246704, "learning_rate": 5.531090450935037e-05, "loss": 2.7796, "step": 6012 }, { "epoch": 0.4930771201639764, "grad_norm": 0.4099719524383545, "learning_rate": 5.5283944031777215e-05, "loss": 2.7807, "step": 6014 }, { "epoch": 0.49324109659236487, "grad_norm": 0.4040038585662842, "learning_rate": 5.5256982000472746e-05, "loss": 2.794, "step": 6016 }, { "epoch": 0.49340507302075326, "grad_norm": 0.39991289377212524, "learning_rate": 5.523001842336507e-05, "loss": 2.7993, "step": 6018 }, { "epoch": 0.4935690494491417, "grad_norm": 0.39744338393211365, "learning_rate": 5.5203053308382766e-05, "loss": 2.7828, "step": 6020 }, { "epoch": 0.4937330258775301, "grad_norm": 0.39443936944007874, "learning_rate": 5.5176086663454884e-05, "loss": 2.7654, "step": 6022 }, { "epoch": 0.49389700230591854, "grad_norm": 0.4279281198978424, "learning_rate": 5.5149118496510865e-05, "loss": 2.79, "step": 6024 }, { "epoch": 0.49406097873430693, "grad_norm": 0.44886335730552673, "learning_rate": 5.5122148815480655e-05, "loss": 2.7787, "step": 6026 }, { "epoch": 0.4942249551626954, "grad_norm": 0.43961870670318604, "learning_rate": 5.5095177628294635e-05, "loss": 2.8273, "step": 6028 }, { "epoch": 0.49438893159108377, "grad_norm": 0.44873446226119995, "learning_rate": 5.506820494288361e-05, "loss": 2.8053, "step": 6030 }, { "epoch": 0.4945529080194722, "grad_norm": 0.49015265703201294, "learning_rate": 5.504123076717882e-05, "loss": 2.8459, "step": 6032 }, { "epoch": 0.4947168844478606, "grad_norm": 0.47962191700935364, "learning_rate": 5.501425510911199e-05, "loss": 2.7945, "step": 6034 }, { "epoch": 0.49488086087624905, "grad_norm": 0.49250590801239014, "learning_rate": 5.4987277976615224e-05, "loss": 2.7716, "step": 6036 }, { "epoch": 0.49504483730463744, "grad_norm": 0.4474705755710602, "learning_rate": 5.4960299377621085e-05, "loss": 2.7868, "step": 6038 }, { "epoch": 0.4952088137330259, "grad_norm": 0.4595922529697418, "learning_rate": 5.493331932006259e-05, "loss": 2.8321, "step": 6040 }, { "epoch": 0.4953727901614143, "grad_norm": 0.41899386048316956, "learning_rate": 5.490633781187313e-05, "loss": 2.7934, "step": 6042 }, { "epoch": 0.4955367665898027, "grad_norm": 0.40570735931396484, "learning_rate": 5.487935486098658e-05, "loss": 2.7884, "step": 6044 }, { "epoch": 0.4957007430181911, "grad_norm": 0.43426474928855896, "learning_rate": 5.48523704753372e-05, "loss": 2.8367, "step": 6046 }, { "epoch": 0.49586471944657956, "grad_norm": 0.4359624683856964, "learning_rate": 5.482538466285968e-05, "loss": 2.7899, "step": 6048 }, { "epoch": 0.49602869587496795, "grad_norm": 0.41395142674446106, "learning_rate": 5.4798397431489146e-05, "loss": 2.7557, "step": 6050 }, { "epoch": 0.4961926723033564, "grad_norm": 0.44301837682724, "learning_rate": 5.4771408789161126e-05, "loss": 2.8514, "step": 6052 }, { "epoch": 0.4963566487317448, "grad_norm": 0.4121994078159332, "learning_rate": 5.474441874381157e-05, "loss": 2.7861, "step": 6054 }, { "epoch": 0.49652062516013323, "grad_norm": 0.4379679262638092, "learning_rate": 5.471742730337682e-05, "loss": 2.7897, "step": 6056 }, { "epoch": 0.4966846015885216, "grad_norm": 0.43107476830482483, "learning_rate": 5.4690434475793674e-05, "loss": 2.7743, "step": 6058 }, { "epoch": 0.49684857801691007, "grad_norm": 0.4141952693462372, "learning_rate": 5.4663440268999285e-05, "loss": 2.7734, "step": 6060 }, { "epoch": 0.4970125544452985, "grad_norm": 0.4251377284526825, "learning_rate": 5.463644469093125e-05, "loss": 2.8095, "step": 6062 }, { "epoch": 0.4971765308736869, "grad_norm": 0.46977972984313965, "learning_rate": 5.4609447749527564e-05, "loss": 2.7804, "step": 6064 }, { "epoch": 0.49734050730207535, "grad_norm": 0.44376277923583984, "learning_rate": 5.458244945272659e-05, "loss": 2.7842, "step": 6066 }, { "epoch": 0.49750448373046374, "grad_norm": 0.4628044664859772, "learning_rate": 5.455544980846714e-05, "loss": 2.7729, "step": 6068 }, { "epoch": 0.4976684601588522, "grad_norm": 0.47324278950691223, "learning_rate": 5.452844882468837e-05, "loss": 2.7729, "step": 6070 }, { "epoch": 0.4978324365872406, "grad_norm": 0.4188961088657379, "learning_rate": 5.450144650932987e-05, "loss": 2.7722, "step": 6072 }, { "epoch": 0.497996413015629, "grad_norm": 0.4392087161540985, "learning_rate": 5.447444287033159e-05, "loss": 2.8274, "step": 6074 }, { "epoch": 0.4981603894440174, "grad_norm": 0.4453078508377075, "learning_rate": 5.4447437915633926e-05, "loss": 2.779, "step": 6076 }, { "epoch": 0.49832436587240586, "grad_norm": 0.4676656126976013, "learning_rate": 5.4420431653177575e-05, "loss": 2.775, "step": 6078 }, { "epoch": 0.49848834230079425, "grad_norm": 0.42664840817451477, "learning_rate": 5.439342409090369e-05, "loss": 2.7797, "step": 6080 }, { "epoch": 0.4986523187291827, "grad_norm": 0.43920576572418213, "learning_rate": 5.436641523675379e-05, "loss": 2.7808, "step": 6082 }, { "epoch": 0.4988162951575711, "grad_norm": 0.4442078769207001, "learning_rate": 5.433940509866975e-05, "loss": 2.8434, "step": 6084 }, { "epoch": 0.49898027158595953, "grad_norm": 0.41238483786582947, "learning_rate": 5.4312393684593833e-05, "loss": 2.8077, "step": 6086 }, { "epoch": 0.4991442480143479, "grad_norm": 0.4398910105228424, "learning_rate": 5.4285381002468696e-05, "loss": 2.7683, "step": 6088 }, { "epoch": 0.49930822444273637, "grad_norm": 0.46023282408714294, "learning_rate": 5.425836706023735e-05, "loss": 2.8109, "step": 6090 }, { "epoch": 0.49947220087112476, "grad_norm": 0.44774797558784485, "learning_rate": 5.4231351865843174e-05, "loss": 2.7536, "step": 6092 }, { "epoch": 0.4996361772995132, "grad_norm": 0.4440127909183502, "learning_rate": 5.4204335427229935e-05, "loss": 2.831, "step": 6094 }, { "epoch": 0.4998001537279016, "grad_norm": 0.39880669116973877, "learning_rate": 5.4177317752341725e-05, "loss": 2.8042, "step": 6096 }, { "epoch": 0.49996413015629004, "grad_norm": 0.38815608620643616, "learning_rate": 5.415029884912306e-05, "loss": 2.7251, "step": 6098 }, { "epoch": 0.5001281065846784, "grad_norm": 0.40447503328323364, "learning_rate": 5.412327872551879e-05, "loss": 2.7954, "step": 6100 }, { "epoch": 0.5002920830130668, "grad_norm": 0.39449626207351685, "learning_rate": 5.409625738947408e-05, "loss": 2.7724, "step": 6102 }, { "epoch": 0.5004560594414553, "grad_norm": 0.4029788374900818, "learning_rate": 5.406923484893452e-05, "loss": 2.8484, "step": 6104 }, { "epoch": 0.5006200358698437, "grad_norm": 0.39150452613830566, "learning_rate": 5.404221111184603e-05, "loss": 2.8264, "step": 6106 }, { "epoch": 0.5007840122982321, "grad_norm": 0.40811586380004883, "learning_rate": 5.4015186186154856e-05, "loss": 2.7428, "step": 6108 }, { "epoch": 0.5009479887266205, "grad_norm": 0.46742165088653564, "learning_rate": 5.39881600798076e-05, "loss": 2.8109, "step": 6110 }, { "epoch": 0.501111965155009, "grad_norm": 0.45936119556427, "learning_rate": 5.396113280075128e-05, "loss": 2.7595, "step": 6112 }, { "epoch": 0.5012759415833974, "grad_norm": 0.4600870609283447, "learning_rate": 5.3934104356933165e-05, "loss": 2.8304, "step": 6114 }, { "epoch": 0.5014399180117858, "grad_norm": 0.4206525683403015, "learning_rate": 5.39070747563009e-05, "loss": 2.8082, "step": 6116 }, { "epoch": 0.5016038944401742, "grad_norm": 0.4527590274810791, "learning_rate": 5.38800440068025e-05, "loss": 2.8014, "step": 6118 }, { "epoch": 0.5017678708685627, "grad_norm": 0.43309837579727173, "learning_rate": 5.385301211638626e-05, "loss": 2.8216, "step": 6120 }, { "epoch": 0.5019318472969511, "grad_norm": 0.44548389315605164, "learning_rate": 5.382597909300087e-05, "loss": 2.7575, "step": 6122 }, { "epoch": 0.5020958237253395, "grad_norm": 0.4065755605697632, "learning_rate": 5.379894494459533e-05, "loss": 2.7667, "step": 6124 }, { "epoch": 0.502259800153728, "grad_norm": 0.39666667580604553, "learning_rate": 5.377190967911895e-05, "loss": 2.792, "step": 6126 }, { "epoch": 0.5024237765821163, "grad_norm": 0.41114503145217896, "learning_rate": 5.374487330452139e-05, "loss": 2.7447, "step": 6128 }, { "epoch": 0.5025877530105047, "grad_norm": 0.42686963081359863, "learning_rate": 5.3717835828752646e-05, "loss": 2.7218, "step": 6130 }, { "epoch": 0.5027517294388931, "grad_norm": 0.4103451073169708, "learning_rate": 5.369079725976301e-05, "loss": 2.7768, "step": 6132 }, { "epoch": 0.5029157058672816, "grad_norm": 0.39283350110054016, "learning_rate": 5.366375760550313e-05, "loss": 2.7872, "step": 6134 }, { "epoch": 0.50307968229567, "grad_norm": 0.38679662346839905, "learning_rate": 5.363671687392393e-05, "loss": 2.7488, "step": 6136 }, { "epoch": 0.5032436587240584, "grad_norm": 0.4073856770992279, "learning_rate": 5.360967507297669e-05, "loss": 2.7554, "step": 6138 }, { "epoch": 0.5034076351524468, "grad_norm": 0.41266268491744995, "learning_rate": 5.358263221061296e-05, "loss": 2.7786, "step": 6140 }, { "epoch": 0.5035716115808353, "grad_norm": 0.3967779278755188, "learning_rate": 5.3555588294784664e-05, "loss": 2.7829, "step": 6142 }, { "epoch": 0.5037355880092237, "grad_norm": 0.41831472516059875, "learning_rate": 5.352854333344397e-05, "loss": 2.7711, "step": 6144 }, { "epoch": 0.5038995644376121, "grad_norm": 0.43159353733062744, "learning_rate": 5.3501497334543416e-05, "loss": 2.7562, "step": 6146 }, { "epoch": 0.5040635408660005, "grad_norm": 0.41569891571998596, "learning_rate": 5.34744503060358e-05, "loss": 2.7809, "step": 6148 }, { "epoch": 0.504227517294389, "grad_norm": 0.38171082735061646, "learning_rate": 5.344740225587423e-05, "loss": 2.7576, "step": 6150 }, { "epoch": 0.5043914937227774, "grad_norm": 0.4033462405204773, "learning_rate": 5.342035319201213e-05, "loss": 2.765, "step": 6152 }, { "epoch": 0.5045554701511658, "grad_norm": 0.4013696610927582, "learning_rate": 5.3393303122403224e-05, "loss": 2.7577, "step": 6154 }, { "epoch": 0.5047194465795541, "grad_norm": 0.40175867080688477, "learning_rate": 5.336625205500151e-05, "loss": 2.8194, "step": 6156 }, { "epoch": 0.5048834230079426, "grad_norm": 0.442961722612381, "learning_rate": 5.333919999776129e-05, "loss": 2.8206, "step": 6158 }, { "epoch": 0.505047399436331, "grad_norm": 0.4085559546947479, "learning_rate": 5.3312146958637175e-05, "loss": 2.7768, "step": 6160 }, { "epoch": 0.5052113758647194, "grad_norm": 0.43066325783729553, "learning_rate": 5.3285092945584005e-05, "loss": 2.798, "step": 6162 }, { "epoch": 0.5053753522931078, "grad_norm": 0.4265105724334717, "learning_rate": 5.3258037966556986e-05, "loss": 2.7422, "step": 6164 }, { "epoch": 0.5055393287214963, "grad_norm": 0.4203242361545563, "learning_rate": 5.3230982029511566e-05, "loss": 2.7914, "step": 6166 }, { "epoch": 0.5057033051498847, "grad_norm": 0.41456693410873413, "learning_rate": 5.3203925142403466e-05, "loss": 2.827, "step": 6168 }, { "epoch": 0.5058672815782731, "grad_norm": 0.4037761986255646, "learning_rate": 5.31768673131887e-05, "loss": 2.8208, "step": 6170 }, { "epoch": 0.5060312580066615, "grad_norm": 0.3898729383945465, "learning_rate": 5.31498085498236e-05, "loss": 2.7565, "step": 6172 }, { "epoch": 0.50619523443505, "grad_norm": 0.4107765257358551, "learning_rate": 5.312274886026467e-05, "loss": 2.8201, "step": 6174 }, { "epoch": 0.5063592108634384, "grad_norm": 0.4226357340812683, "learning_rate": 5.309568825246879e-05, "loss": 2.7327, "step": 6176 }, { "epoch": 0.5065231872918268, "grad_norm": 0.42700108885765076, "learning_rate": 5.3068626734393056e-05, "loss": 2.8095, "step": 6178 }, { "epoch": 0.5066871637202153, "grad_norm": 0.4003014862537384, "learning_rate": 5.304156431399484e-05, "loss": 2.7627, "step": 6180 }, { "epoch": 0.5068511401486037, "grad_norm": 0.41454121470451355, "learning_rate": 5.30145009992318e-05, "loss": 2.7588, "step": 6182 }, { "epoch": 0.507015116576992, "grad_norm": 0.4294338822364807, "learning_rate": 5.298743679806182e-05, "loss": 2.8088, "step": 6184 }, { "epoch": 0.5071790930053804, "grad_norm": 0.42749956250190735, "learning_rate": 5.2960371718443055e-05, "loss": 2.8191, "step": 6186 }, { "epoch": 0.507343069433769, "grad_norm": 0.43081730604171753, "learning_rate": 5.2933305768333955e-05, "loss": 2.7848, "step": 6188 }, { "epoch": 0.5075070458621573, "grad_norm": 0.4279966652393341, "learning_rate": 5.29062389556932e-05, "loss": 2.7276, "step": 6190 }, { "epoch": 0.5076710222905457, "grad_norm": 0.4057946503162384, "learning_rate": 5.287917128847969e-05, "loss": 2.7764, "step": 6192 }, { "epoch": 0.5078349987189341, "grad_norm": 0.41668733954429626, "learning_rate": 5.2852102774652634e-05, "loss": 2.7548, "step": 6194 }, { "epoch": 0.5079989751473226, "grad_norm": 0.4230756461620331, "learning_rate": 5.282503342217148e-05, "loss": 2.7572, "step": 6196 }, { "epoch": 0.508162951575711, "grad_norm": 0.42891067266464233, "learning_rate": 5.279796323899587e-05, "loss": 2.7678, "step": 6198 }, { "epoch": 0.5083269280040994, "grad_norm": 0.4005391299724579, "learning_rate": 5.277089223308576e-05, "loss": 2.792, "step": 6200 }, { "epoch": 0.5084909044324878, "grad_norm": 0.4525899291038513, "learning_rate": 5.274382041240129e-05, "loss": 2.8306, "step": 6202 }, { "epoch": 0.5086548808608763, "grad_norm": 0.4422246515750885, "learning_rate": 5.2716747784902875e-05, "loss": 2.7601, "step": 6204 }, { "epoch": 0.5088188572892647, "grad_norm": 0.4195386469364166, "learning_rate": 5.2689674358551175e-05, "loss": 2.7958, "step": 6206 }, { "epoch": 0.5089828337176531, "grad_norm": 0.4343207776546478, "learning_rate": 5.266260014130705e-05, "loss": 2.8017, "step": 6208 }, { "epoch": 0.5091468101460415, "grad_norm": 0.40766385197639465, "learning_rate": 5.26355251411316e-05, "loss": 2.7443, "step": 6210 }, { "epoch": 0.50931078657443, "grad_norm": 0.38682085275650024, "learning_rate": 5.2608449365986175e-05, "loss": 2.7368, "step": 6212 }, { "epoch": 0.5094747630028184, "grad_norm": 0.3705482482910156, "learning_rate": 5.2581372823832364e-05, "loss": 2.7829, "step": 6214 }, { "epoch": 0.5096387394312067, "grad_norm": 0.3838585317134857, "learning_rate": 5.255429552263194e-05, "loss": 2.8119, "step": 6216 }, { "epoch": 0.5098027158595951, "grad_norm": 0.4112929403781891, "learning_rate": 5.25272174703469e-05, "loss": 2.8034, "step": 6218 }, { "epoch": 0.5099666922879836, "grad_norm": 0.43207964301109314, "learning_rate": 5.250013867493953e-05, "loss": 2.7391, "step": 6220 }, { "epoch": 0.510130668716372, "grad_norm": 0.4452822804450989, "learning_rate": 5.2473059144372224e-05, "loss": 2.7944, "step": 6222 }, { "epoch": 0.5102946451447604, "grad_norm": 0.4192415177822113, "learning_rate": 5.2445978886607714e-05, "loss": 2.7224, "step": 6224 }, { "epoch": 0.5104586215731488, "grad_norm": 0.4334215819835663, "learning_rate": 5.241889790960887e-05, "loss": 2.7728, "step": 6226 }, { "epoch": 0.5106225980015373, "grad_norm": 0.44213369488716125, "learning_rate": 5.239181622133876e-05, "loss": 2.8489, "step": 6228 }, { "epoch": 0.5107865744299257, "grad_norm": 0.43869420886039734, "learning_rate": 5.2364733829760726e-05, "loss": 2.7913, "step": 6230 }, { "epoch": 0.5109505508583141, "grad_norm": 0.41994988918304443, "learning_rate": 5.2337650742838274e-05, "loss": 2.7579, "step": 6232 }, { "epoch": 0.5111145272867026, "grad_norm": 0.4251670837402344, "learning_rate": 5.231056696853509e-05, "loss": 2.7373, "step": 6234 }, { "epoch": 0.511278503715091, "grad_norm": 0.42417407035827637, "learning_rate": 5.228348251481514e-05, "loss": 2.8169, "step": 6236 }, { "epoch": 0.5114424801434794, "grad_norm": 0.4605846703052521, "learning_rate": 5.2256397389642517e-05, "loss": 2.7304, "step": 6238 }, { "epoch": 0.5116064565718678, "grad_norm": 0.41390493512153625, "learning_rate": 5.2229311600981546e-05, "loss": 2.771, "step": 6240 }, { "epoch": 0.5117704330002563, "grad_norm": 0.4106113016605377, "learning_rate": 5.220222515679674e-05, "loss": 2.7874, "step": 6242 }, { "epoch": 0.5119344094286447, "grad_norm": 0.40672963857650757, "learning_rate": 5.2175138065052806e-05, "loss": 2.7679, "step": 6244 }, { "epoch": 0.512098385857033, "grad_norm": 0.4203855097293854, "learning_rate": 5.214805033371463e-05, "loss": 2.7466, "step": 6246 }, { "epoch": 0.5122623622854214, "grad_norm": 0.3548525869846344, "learning_rate": 5.21209619707473e-05, "loss": 2.8231, "step": 6248 }, { "epoch": 0.5124263387138099, "grad_norm": 0.37747371196746826, "learning_rate": 5.20938729841161e-05, "loss": 2.8141, "step": 6250 }, { "epoch": 0.5125903151421983, "grad_norm": 0.41691645979881287, "learning_rate": 5.206678338178647e-05, "loss": 2.783, "step": 6252 }, { "epoch": 0.5127542915705867, "grad_norm": 0.4288187623023987, "learning_rate": 5.2039693171724034e-05, "loss": 2.7246, "step": 6254 }, { "epoch": 0.5129182679989751, "grad_norm": 0.41232407093048096, "learning_rate": 5.2012602361894626e-05, "loss": 2.7771, "step": 6256 }, { "epoch": 0.5130822444273636, "grad_norm": 0.39724135398864746, "learning_rate": 5.1985510960264216e-05, "loss": 2.7625, "step": 6258 }, { "epoch": 0.513246220855752, "grad_norm": 0.40316760540008545, "learning_rate": 5.195841897479897e-05, "loss": 2.79, "step": 6260 }, { "epoch": 0.5134101972841404, "grad_norm": 0.3974754214286804, "learning_rate": 5.193132641346524e-05, "loss": 2.7598, "step": 6262 }, { "epoch": 0.5135741737125288, "grad_norm": 0.4112754464149475, "learning_rate": 5.190423328422951e-05, "loss": 2.7954, "step": 6264 }, { "epoch": 0.5137381501409173, "grad_norm": 0.43297746777534485, "learning_rate": 5.1877139595058465e-05, "loss": 2.7357, "step": 6266 }, { "epoch": 0.5139021265693057, "grad_norm": 0.39716073870658875, "learning_rate": 5.185004535391893e-05, "loss": 2.7394, "step": 6268 }, { "epoch": 0.5140661029976941, "grad_norm": 0.3820559084415436, "learning_rate": 5.1822950568777906e-05, "loss": 2.7911, "step": 6270 }, { "epoch": 0.5142300794260825, "grad_norm": 0.4085080623626709, "learning_rate": 5.1795855247602564e-05, "loss": 2.7983, "step": 6272 }, { "epoch": 0.514394055854471, "grad_norm": 0.3924587368965149, "learning_rate": 5.176875939836019e-05, "loss": 2.7302, "step": 6274 }, { "epoch": 0.5145580322828593, "grad_norm": 0.4014110565185547, "learning_rate": 5.174166302901828e-05, "loss": 2.7932, "step": 6276 }, { "epoch": 0.5147220087112477, "grad_norm": 0.4239272475242615, "learning_rate": 5.1714566147544454e-05, "loss": 2.8041, "step": 6278 }, { "epoch": 0.5148859851396361, "grad_norm": 0.3936333954334259, "learning_rate": 5.168746876190649e-05, "loss": 2.8143, "step": 6280 }, { "epoch": 0.5150499615680246, "grad_norm": 0.42053771018981934, "learning_rate": 5.166037088007228e-05, "loss": 2.7328, "step": 6282 }, { "epoch": 0.515213937996413, "grad_norm": 0.4015074074268341, "learning_rate": 5.1633272510009924e-05, "loss": 2.7999, "step": 6284 }, { "epoch": 0.5153779144248014, "grad_norm": 0.4088298976421356, "learning_rate": 5.160617365968764e-05, "loss": 2.7773, "step": 6286 }, { "epoch": 0.5155418908531898, "grad_norm": 0.3864319920539856, "learning_rate": 5.157907433707375e-05, "loss": 2.7068, "step": 6288 }, { "epoch": 0.5157058672815783, "grad_norm": 0.41067034006118774, "learning_rate": 5.155197455013676e-05, "loss": 2.7409, "step": 6290 }, { "epoch": 0.5158698437099667, "grad_norm": 0.39901524782180786, "learning_rate": 5.1524874306845316e-05, "loss": 2.8163, "step": 6292 }, { "epoch": 0.5160338201383551, "grad_norm": 0.4010452330112457, "learning_rate": 5.1497773615168154e-05, "loss": 2.7706, "step": 6294 }, { "epoch": 0.5161977965667436, "grad_norm": 0.39766988158226013, "learning_rate": 5.14706724830742e-05, "loss": 2.8286, "step": 6296 }, { "epoch": 0.516361772995132, "grad_norm": 0.4324948489665985, "learning_rate": 5.1443570918532456e-05, "loss": 2.7614, "step": 6298 }, { "epoch": 0.5165257494235204, "grad_norm": 0.42732223868370056, "learning_rate": 5.141646892951206e-05, "loss": 2.8096, "step": 6300 }, { "epoch": 0.5166897258519088, "grad_norm": 0.3943778872489929, "learning_rate": 5.1389366523982306e-05, "loss": 2.7662, "step": 6302 }, { "epoch": 0.5168537022802973, "grad_norm": 0.4113095700740814, "learning_rate": 5.1362263709912604e-05, "loss": 2.7288, "step": 6304 }, { "epoch": 0.5170176787086856, "grad_norm": 0.4113110899925232, "learning_rate": 5.133516049527245e-05, "loss": 2.7923, "step": 6306 }, { "epoch": 0.517181655137074, "grad_norm": 0.42895427346229553, "learning_rate": 5.130805688803149e-05, "loss": 2.7665, "step": 6308 }, { "epoch": 0.5173456315654624, "grad_norm": 0.39833179116249084, "learning_rate": 5.1280952896159484e-05, "loss": 2.7699, "step": 6310 }, { "epoch": 0.5175096079938509, "grad_norm": 0.4006580412387848, "learning_rate": 5.125384852762628e-05, "loss": 2.787, "step": 6312 }, { "epoch": 0.5176735844222393, "grad_norm": 0.40851083397865295, "learning_rate": 5.122674379040186e-05, "loss": 2.7652, "step": 6314 }, { "epoch": 0.5178375608506277, "grad_norm": 0.3744722604751587, "learning_rate": 5.119963869245634e-05, "loss": 2.7533, "step": 6316 }, { "epoch": 0.5180015372790161, "grad_norm": 0.41793954372406006, "learning_rate": 5.1172533241759854e-05, "loss": 2.8169, "step": 6318 }, { "epoch": 0.5181655137074046, "grad_norm": 0.4526681900024414, "learning_rate": 5.114542744628275e-05, "loss": 2.7893, "step": 6320 }, { "epoch": 0.518329490135793, "grad_norm": 0.41063380241394043, "learning_rate": 5.111832131399539e-05, "loss": 2.7651, "step": 6322 }, { "epoch": 0.5184934665641814, "grad_norm": 0.38161396980285645, "learning_rate": 5.109121485286826e-05, "loss": 2.7477, "step": 6324 }, { "epoch": 0.5186574429925698, "grad_norm": 0.3881427049636841, "learning_rate": 5.106410807087197e-05, "loss": 2.7705, "step": 6326 }, { "epoch": 0.5188214194209583, "grad_norm": 0.39390257000923157, "learning_rate": 5.103700097597721e-05, "loss": 2.7051, "step": 6328 }, { "epoch": 0.5189853958493467, "grad_norm": 0.40716588497161865, "learning_rate": 5.1009893576154753e-05, "loss": 2.7446, "step": 6330 }, { "epoch": 0.519149372277735, "grad_norm": 0.3895430266857147, "learning_rate": 5.098278587937545e-05, "loss": 2.76, "step": 6332 }, { "epoch": 0.5193133487061234, "grad_norm": 0.4046901762485504, "learning_rate": 5.095567789361029e-05, "loss": 2.814, "step": 6334 }, { "epoch": 0.519477325134512, "grad_norm": 0.41053667664527893, "learning_rate": 5.092856962683028e-05, "loss": 2.7751, "step": 6336 }, { "epoch": 0.5196413015629003, "grad_norm": 0.4014565348625183, "learning_rate": 5.090146108700654e-05, "loss": 2.7611, "step": 6338 }, { "epoch": 0.5198052779912887, "grad_norm": 0.4204341471195221, "learning_rate": 5.087435228211032e-05, "loss": 2.7913, "step": 6340 }, { "epoch": 0.5199692544196771, "grad_norm": 0.4301351308822632, "learning_rate": 5.084724322011284e-05, "loss": 2.7799, "step": 6342 }, { "epoch": 0.5201332308480656, "grad_norm": 0.43886420130729675, "learning_rate": 5.082013390898549e-05, "loss": 2.7522, "step": 6344 }, { "epoch": 0.520297207276454, "grad_norm": 0.46510088443756104, "learning_rate": 5.07930243566997e-05, "loss": 2.8082, "step": 6346 }, { "epoch": 0.5204611837048424, "grad_norm": 0.4435691833496094, "learning_rate": 5.076591457122696e-05, "loss": 2.7263, "step": 6348 }, { "epoch": 0.5206251601332309, "grad_norm": 0.42445266246795654, "learning_rate": 5.073880456053882e-05, "loss": 2.7793, "step": 6350 }, { "epoch": 0.5207891365616193, "grad_norm": 0.3983137607574463, "learning_rate": 5.071169433260696e-05, "loss": 2.7799, "step": 6352 }, { "epoch": 0.5209531129900077, "grad_norm": 0.4298652708530426, "learning_rate": 5.0684583895403034e-05, "loss": 2.7705, "step": 6354 }, { "epoch": 0.5211170894183961, "grad_norm": 0.40018078684806824, "learning_rate": 5.065747325689884e-05, "loss": 2.7392, "step": 6356 }, { "epoch": 0.5212810658467846, "grad_norm": 0.3763446807861328, "learning_rate": 5.063036242506618e-05, "loss": 2.7786, "step": 6358 }, { "epoch": 0.521445042275173, "grad_norm": 0.4118928015232086, "learning_rate": 5.0603251407876915e-05, "loss": 2.7513, "step": 6360 }, { "epoch": 0.5216090187035614, "grad_norm": 0.45322826504707336, "learning_rate": 5.057614021330299e-05, "loss": 2.7825, "step": 6362 }, { "epoch": 0.5217729951319497, "grad_norm": 0.41894903779029846, "learning_rate": 5.0549028849316416e-05, "loss": 2.7983, "step": 6364 }, { "epoch": 0.5219369715603382, "grad_norm": 0.45578399300575256, "learning_rate": 5.052191732388919e-05, "loss": 2.7725, "step": 6366 }, { "epoch": 0.5221009479887266, "grad_norm": 0.4369242191314697, "learning_rate": 5.0494805644993394e-05, "loss": 2.7747, "step": 6368 }, { "epoch": 0.522264924417115, "grad_norm": 0.42365267872810364, "learning_rate": 5.046769382060116e-05, "loss": 2.7458, "step": 6370 }, { "epoch": 0.5224289008455034, "grad_norm": 0.43620315194129944, "learning_rate": 5.044058185868465e-05, "loss": 2.8222, "step": 6372 }, { "epoch": 0.5225928772738919, "grad_norm": 0.4382375478744507, "learning_rate": 5.0413469767216094e-05, "loss": 2.7856, "step": 6374 }, { "epoch": 0.5227568537022803, "grad_norm": 0.4498637616634369, "learning_rate": 5.0386357554167726e-05, "loss": 2.7856, "step": 6376 }, { "epoch": 0.5229208301306687, "grad_norm": 0.44776564836502075, "learning_rate": 5.035924522751182e-05, "loss": 2.7612, "step": 6378 }, { "epoch": 0.5230848065590571, "grad_norm": 0.4710599184036255, "learning_rate": 5.03321327952207e-05, "loss": 2.7498, "step": 6380 }, { "epoch": 0.5232487829874456, "grad_norm": 0.42744359374046326, "learning_rate": 5.030502026526671e-05, "loss": 2.7498, "step": 6382 }, { "epoch": 0.523412759415834, "grad_norm": 0.4643082320690155, "learning_rate": 5.0277907645622235e-05, "loss": 2.7945, "step": 6384 }, { "epoch": 0.5235767358442224, "grad_norm": 0.4620349109172821, "learning_rate": 5.025079494425966e-05, "loss": 2.8336, "step": 6386 }, { "epoch": 0.5237407122726108, "grad_norm": 0.4274936616420746, "learning_rate": 5.022368216915143e-05, "loss": 2.8171, "step": 6388 }, { "epoch": 0.5239046887009993, "grad_norm": 0.4141872823238373, "learning_rate": 5.019656932826999e-05, "loss": 2.7208, "step": 6390 }, { "epoch": 0.5240686651293877, "grad_norm": 0.4212776720523834, "learning_rate": 5.016945642958779e-05, "loss": 2.818, "step": 6392 }, { "epoch": 0.524232641557776, "grad_norm": 0.41134974360466003, "learning_rate": 5.0142343481077336e-05, "loss": 2.743, "step": 6394 }, { "epoch": 0.5243966179861644, "grad_norm": 0.40472692251205444, "learning_rate": 5.011523049071111e-05, "loss": 2.787, "step": 6396 }, { "epoch": 0.5245605944145529, "grad_norm": 0.42915216088294983, "learning_rate": 5.008811746646161e-05, "loss": 2.8417, "step": 6398 }, { "epoch": 0.5247245708429413, "grad_norm": 0.4095515310764313, "learning_rate": 5.0061004416301405e-05, "loss": 2.7547, "step": 6400 }, { "epoch": 0.5248885472713297, "grad_norm": 0.3783782720565796, "learning_rate": 5.003389134820298e-05, "loss": 2.717, "step": 6402 }, { "epoch": 0.5250525236997182, "grad_norm": 0.3920300602912903, "learning_rate": 5.0006778270138875e-05, "loss": 2.7286, "step": 6404 }, { "epoch": 0.5252165001281066, "grad_norm": 0.3948673605918884, "learning_rate": 4.997966519008165e-05, "loss": 2.8211, "step": 6406 }, { "epoch": 0.525380476556495, "grad_norm": 0.39030882716178894, "learning_rate": 4.995255211600383e-05, "loss": 2.7395, "step": 6408 }, { "epoch": 0.5255444529848834, "grad_norm": 0.383803129196167, "learning_rate": 4.9925439055877945e-05, "loss": 2.6929, "step": 6410 }, { "epoch": 0.5257084294132719, "grad_norm": 0.4085908830165863, "learning_rate": 4.989832601767653e-05, "loss": 2.7877, "step": 6412 }, { "epoch": 0.5258724058416603, "grad_norm": 0.361942321062088, "learning_rate": 4.9871213009372115e-05, "loss": 2.7773, "step": 6414 }, { "epoch": 0.5260363822700487, "grad_norm": 0.39183202385902405, "learning_rate": 4.9844100038937216e-05, "loss": 2.8114, "step": 6416 }, { "epoch": 0.5262003586984371, "grad_norm": 0.41694483160972595, "learning_rate": 4.98169871143443e-05, "loss": 2.7261, "step": 6418 }, { "epoch": 0.5263643351268256, "grad_norm": 0.4080658555030823, "learning_rate": 4.9789874243565935e-05, "loss": 2.7013, "step": 6420 }, { "epoch": 0.526528311555214, "grad_norm": 0.4062003791332245, "learning_rate": 4.976276143457454e-05, "loss": 2.7662, "step": 6422 }, { "epoch": 0.5266922879836023, "grad_norm": 0.4070029556751251, "learning_rate": 4.973564869534257e-05, "loss": 2.735, "step": 6424 }, { "epoch": 0.5268562644119907, "grad_norm": 0.397651731967926, "learning_rate": 4.97085360338425e-05, "loss": 2.6623, "step": 6426 }, { "epoch": 0.5270202408403792, "grad_norm": 0.41555312275886536, "learning_rate": 4.9681423458046725e-05, "loss": 2.7732, "step": 6428 }, { "epoch": 0.5271842172687676, "grad_norm": 0.3885171413421631, "learning_rate": 4.965431097592761e-05, "loss": 2.8271, "step": 6430 }, { "epoch": 0.527348193697156, "grad_norm": 0.37522533535957336, "learning_rate": 4.962719859545756e-05, "loss": 2.7542, "step": 6432 }, { "epoch": 0.5275121701255444, "grad_norm": 0.3871397376060486, "learning_rate": 4.96000863246089e-05, "loss": 2.7809, "step": 6434 }, { "epoch": 0.5276761465539329, "grad_norm": 0.42370837926864624, "learning_rate": 4.9572974171353895e-05, "loss": 2.766, "step": 6436 }, { "epoch": 0.5278401229823213, "grad_norm": 0.40288394689559937, "learning_rate": 4.954586214366484e-05, "loss": 2.7893, "step": 6438 }, { "epoch": 0.5280040994107097, "grad_norm": 0.3945050835609436, "learning_rate": 4.951875024951396e-05, "loss": 2.8174, "step": 6440 }, { "epoch": 0.5281680758390981, "grad_norm": 0.382445752620697, "learning_rate": 4.949163849687341e-05, "loss": 2.7378, "step": 6442 }, { "epoch": 0.5283320522674866, "grad_norm": 0.3933001756668091, "learning_rate": 4.946452689371539e-05, "loss": 2.7724, "step": 6444 }, { "epoch": 0.528496028695875, "grad_norm": 0.44155430793762207, "learning_rate": 4.943741544801198e-05, "loss": 2.7414, "step": 6446 }, { "epoch": 0.5286600051242634, "grad_norm": 0.41750699281692505, "learning_rate": 4.941030416773521e-05, "loss": 2.7565, "step": 6448 }, { "epoch": 0.5288239815526518, "grad_norm": 0.41295358538627625, "learning_rate": 4.9383193060857135e-05, "loss": 2.7785, "step": 6450 }, { "epoch": 0.5289879579810403, "grad_norm": 0.43917882442474365, "learning_rate": 4.9356082135349696e-05, "loss": 2.7086, "step": 6452 }, { "epoch": 0.5291519344094286, "grad_norm": 0.41664546728134155, "learning_rate": 4.932897139918478e-05, "loss": 2.7405, "step": 6454 }, { "epoch": 0.529315910837817, "grad_norm": 0.4118877649307251, "learning_rate": 4.930186086033424e-05, "loss": 2.7359, "step": 6456 }, { "epoch": 0.5294798872662054, "grad_norm": 0.4057008624076843, "learning_rate": 4.9274750526769886e-05, "loss": 2.7382, "step": 6458 }, { "epoch": 0.5296438636945939, "grad_norm": 0.39607709646224976, "learning_rate": 4.9247640406463426e-05, "loss": 2.7757, "step": 6460 }, { "epoch": 0.5298078401229823, "grad_norm": 0.3914690911769867, "learning_rate": 4.922053050738655e-05, "loss": 2.7573, "step": 6462 }, { "epoch": 0.5299718165513707, "grad_norm": 0.40739142894744873, "learning_rate": 4.919342083751084e-05, "loss": 2.7704, "step": 6464 }, { "epoch": 0.5301357929797592, "grad_norm": 0.43088585138320923, "learning_rate": 4.916631140480782e-05, "loss": 2.7606, "step": 6466 }, { "epoch": 0.5302997694081476, "grad_norm": 0.4455958604812622, "learning_rate": 4.913920221724899e-05, "loss": 2.7608, "step": 6468 }, { "epoch": 0.530463745836536, "grad_norm": 0.4500090777873993, "learning_rate": 4.911209328280573e-05, "loss": 2.7754, "step": 6470 }, { "epoch": 0.5306277222649244, "grad_norm": 0.43248435854911804, "learning_rate": 4.9084984609449324e-05, "loss": 2.8032, "step": 6472 }, { "epoch": 0.5307916986933129, "grad_norm": 0.443559467792511, "learning_rate": 4.9057876205151066e-05, "loss": 2.7396, "step": 6474 }, { "epoch": 0.5309556751217013, "grad_norm": 0.41736772656440735, "learning_rate": 4.90307680778821e-05, "loss": 2.7462, "step": 6476 }, { "epoch": 0.5311196515500897, "grad_norm": 0.4045504629611969, "learning_rate": 4.9003660235613494e-05, "loss": 2.7085, "step": 6478 }, { "epoch": 0.5312836279784781, "grad_norm": 0.4106195867061615, "learning_rate": 4.897655268631624e-05, "loss": 2.7858, "step": 6480 }, { "epoch": 0.5314476044068666, "grad_norm": 0.4442145526409149, "learning_rate": 4.894944543796129e-05, "loss": 2.7608, "step": 6482 }, { "epoch": 0.531611580835255, "grad_norm": 0.41611093282699585, "learning_rate": 4.892233849851941e-05, "loss": 2.8305, "step": 6484 }, { "epoch": 0.5317755572636433, "grad_norm": 0.39070653915405273, "learning_rate": 4.889523187596139e-05, "loss": 2.7433, "step": 6486 }, { "epoch": 0.5319395336920317, "grad_norm": 0.40593773126602173, "learning_rate": 4.8868125578257835e-05, "loss": 2.7716, "step": 6488 }, { "epoch": 0.5321035101204202, "grad_norm": 0.3786163032054901, "learning_rate": 4.884101961337927e-05, "loss": 2.7912, "step": 6490 }, { "epoch": 0.5322674865488086, "grad_norm": 0.37940514087677, "learning_rate": 4.881391398929619e-05, "loss": 2.8256, "step": 6492 }, { "epoch": 0.532431462977197, "grad_norm": 0.3631497323513031, "learning_rate": 4.8786808713978905e-05, "loss": 2.7599, "step": 6494 }, { "epoch": 0.5325954394055854, "grad_norm": 0.407393217086792, "learning_rate": 4.875970379539765e-05, "loss": 2.7604, "step": 6496 }, { "epoch": 0.5327594158339739, "grad_norm": 0.40873128175735474, "learning_rate": 4.87325992415226e-05, "loss": 2.7488, "step": 6498 }, { "epoch": 0.5329233922623623, "grad_norm": 0.37873297929763794, "learning_rate": 4.8705495060323765e-05, "loss": 2.705, "step": 6500 }, { "epoch": 0.5330873686907507, "grad_norm": 0.3927375078201294, "learning_rate": 4.867839125977105e-05, "loss": 2.7707, "step": 6502 }, { "epoch": 0.5332513451191391, "grad_norm": 0.3866179287433624, "learning_rate": 4.8651287847834285e-05, "loss": 2.8109, "step": 6504 }, { "epoch": 0.5334153215475276, "grad_norm": 0.42081525921821594, "learning_rate": 4.862418483248316e-05, "loss": 2.7731, "step": 6506 }, { "epoch": 0.533579297975916, "grad_norm": 0.4573631286621094, "learning_rate": 4.859708222168724e-05, "loss": 2.7307, "step": 6508 }, { "epoch": 0.5337432744043044, "grad_norm": 0.4063120186328888, "learning_rate": 4.856998002341601e-05, "loss": 2.7532, "step": 6510 }, { "epoch": 0.5339072508326927, "grad_norm": 0.41616347432136536, "learning_rate": 4.8542878245638795e-05, "loss": 2.7735, "step": 6512 }, { "epoch": 0.5340712272610812, "grad_norm": 0.4180244207382202, "learning_rate": 4.8515776896324784e-05, "loss": 2.7492, "step": 6514 }, { "epoch": 0.5342352036894696, "grad_norm": 0.4002653658390045, "learning_rate": 4.848867598344311e-05, "loss": 2.737, "step": 6516 }, { "epoch": 0.534399180117858, "grad_norm": 0.3756474554538727, "learning_rate": 4.846157551496271e-05, "loss": 2.765, "step": 6518 }, { "epoch": 0.5345631565462465, "grad_norm": 0.3449940085411072, "learning_rate": 4.8434475498852394e-05, "loss": 2.7273, "step": 6520 }, { "epoch": 0.5347271329746349, "grad_norm": 0.3994511365890503, "learning_rate": 4.84073759430809e-05, "loss": 2.7916, "step": 6522 }, { "epoch": 0.5348911094030233, "grad_norm": 0.4182688295841217, "learning_rate": 4.8380276855616755e-05, "loss": 2.7951, "step": 6524 }, { "epoch": 0.5350550858314117, "grad_norm": 0.40519729256629944, "learning_rate": 4.8353178244428395e-05, "loss": 2.7612, "step": 6526 }, { "epoch": 0.5352190622598002, "grad_norm": 0.3876676857471466, "learning_rate": 4.832608011748411e-05, "loss": 2.717, "step": 6528 }, { "epoch": 0.5353830386881886, "grad_norm": 0.3639967143535614, "learning_rate": 4.829898248275203e-05, "loss": 2.7299, "step": 6530 }, { "epoch": 0.535547015116577, "grad_norm": 0.3999374508857727, "learning_rate": 4.827188534820013e-05, "loss": 2.7423, "step": 6532 }, { "epoch": 0.5357109915449654, "grad_norm": 0.3990139365196228, "learning_rate": 4.824478872179631e-05, "loss": 2.7551, "step": 6534 }, { "epoch": 0.5358749679733539, "grad_norm": 0.38419950008392334, "learning_rate": 4.821769261150823e-05, "loss": 2.7131, "step": 6536 }, { "epoch": 0.5360389444017423, "grad_norm": 0.3976088762283325, "learning_rate": 4.819059702530343e-05, "loss": 2.766, "step": 6538 }, { "epoch": 0.5362029208301307, "grad_norm": 0.45600828528404236, "learning_rate": 4.816350197114935e-05, "loss": 2.7843, "step": 6540 }, { "epoch": 0.536366897258519, "grad_norm": 0.4235410988330841, "learning_rate": 4.813640745701319e-05, "loss": 2.7852, "step": 6542 }, { "epoch": 0.5365308736869076, "grad_norm": 0.473484069108963, "learning_rate": 4.810931349086202e-05, "loss": 2.7651, "step": 6544 }, { "epoch": 0.5366948501152959, "grad_norm": 0.4139662981033325, "learning_rate": 4.80822200806628e-05, "loss": 2.7612, "step": 6546 }, { "epoch": 0.5368588265436843, "grad_norm": 0.3821699917316437, "learning_rate": 4.8055127234382243e-05, "loss": 2.7275, "step": 6548 }, { "epoch": 0.5370228029720727, "grad_norm": 0.40968820452690125, "learning_rate": 4.8028034959986954e-05, "loss": 2.826, "step": 6550 }, { "epoch": 0.5371867794004612, "grad_norm": 0.41920238733291626, "learning_rate": 4.8000943265443355e-05, "loss": 2.7462, "step": 6552 }, { "epoch": 0.5373507558288496, "grad_norm": 0.41442564129829407, "learning_rate": 4.797385215871769e-05, "loss": 2.7687, "step": 6554 }, { "epoch": 0.537514732257238, "grad_norm": 0.40692684054374695, "learning_rate": 4.7946761647776015e-05, "loss": 2.7562, "step": 6556 }, { "epoch": 0.5376787086856264, "grad_norm": 0.41064393520355225, "learning_rate": 4.791967174058428e-05, "loss": 2.7402, "step": 6558 }, { "epoch": 0.5378426851140149, "grad_norm": 0.3895728290081024, "learning_rate": 4.789258244510817e-05, "loss": 2.7531, "step": 6560 }, { "epoch": 0.5380066615424033, "grad_norm": 0.3740815222263336, "learning_rate": 4.786549376931322e-05, "loss": 2.7991, "step": 6562 }, { "epoch": 0.5381706379707917, "grad_norm": 0.39689722657203674, "learning_rate": 4.7838405721164833e-05, "loss": 2.743, "step": 6564 }, { "epoch": 0.5383346143991801, "grad_norm": 0.38677778840065, "learning_rate": 4.7811318308628154e-05, "loss": 2.7792, "step": 6566 }, { "epoch": 0.5384985908275686, "grad_norm": 0.3950054943561554, "learning_rate": 4.7784231539668164e-05, "loss": 2.7371, "step": 6568 }, { "epoch": 0.538662567255957, "grad_norm": 0.4083583950996399, "learning_rate": 4.775714542224971e-05, "loss": 2.7824, "step": 6570 }, { "epoch": 0.5388265436843453, "grad_norm": 0.3985320031642914, "learning_rate": 4.773005996433737e-05, "loss": 2.7895, "step": 6572 }, { "epoch": 0.5389905201127337, "grad_norm": 0.37357017397880554, "learning_rate": 4.7702975173895544e-05, "loss": 2.7912, "step": 6574 }, { "epoch": 0.5391544965411222, "grad_norm": 0.40611863136291504, "learning_rate": 4.767589105888849e-05, "loss": 2.6963, "step": 6576 }, { "epoch": 0.5393184729695106, "grad_norm": 0.39386776089668274, "learning_rate": 4.7648807627280206e-05, "loss": 2.7935, "step": 6578 }, { "epoch": 0.539482449397899, "grad_norm": 0.4086373448371887, "learning_rate": 4.762172488703449e-05, "loss": 2.786, "step": 6580 }, { "epoch": 0.5396464258262875, "grad_norm": 0.40746963024139404, "learning_rate": 4.7594642846114995e-05, "loss": 2.7644, "step": 6582 }, { "epoch": 0.5398104022546759, "grad_norm": 0.38858145475387573, "learning_rate": 4.756756151248512e-05, "loss": 2.7503, "step": 6584 }, { "epoch": 0.5399743786830643, "grad_norm": 0.38517722487449646, "learning_rate": 4.754048089410805e-05, "loss": 2.8053, "step": 6586 }, { "epoch": 0.5401383551114527, "grad_norm": 0.3999423086643219, "learning_rate": 4.7513400998946814e-05, "loss": 2.7599, "step": 6588 }, { "epoch": 0.5403023315398412, "grad_norm": 0.4180408716201782, "learning_rate": 4.748632183496416e-05, "loss": 2.779, "step": 6590 }, { "epoch": 0.5404663079682296, "grad_norm": 0.4267200231552124, "learning_rate": 4.745924341012266e-05, "loss": 2.7721, "step": 6592 }, { "epoch": 0.540630284396618, "grad_norm": 0.42305755615234375, "learning_rate": 4.743216573238466e-05, "loss": 2.7375, "step": 6594 }, { "epoch": 0.5407942608250064, "grad_norm": 0.41102567315101624, "learning_rate": 4.7405088809712294e-05, "loss": 2.7693, "step": 6596 }, { "epoch": 0.5409582372533949, "grad_norm": 0.40447738766670227, "learning_rate": 4.737801265006746e-05, "loss": 2.7783, "step": 6598 }, { "epoch": 0.5411222136817833, "grad_norm": 0.37356311082839966, "learning_rate": 4.735093726141185e-05, "loss": 2.7405, "step": 6600 }, { "epoch": 0.5412861901101716, "grad_norm": 0.3971530795097351, "learning_rate": 4.7323862651706925e-05, "loss": 2.7306, "step": 6602 }, { "epoch": 0.54145016653856, "grad_norm": 0.4356495141983032, "learning_rate": 4.729678882891386e-05, "loss": 2.8043, "step": 6604 }, { "epoch": 0.5416141429669485, "grad_norm": 0.41701847314834595, "learning_rate": 4.726971580099372e-05, "loss": 2.7598, "step": 6606 }, { "epoch": 0.5417781193953369, "grad_norm": 0.4182874262332916, "learning_rate": 4.724264357590722e-05, "loss": 2.7265, "step": 6608 }, { "epoch": 0.5419420958237253, "grad_norm": 0.3866254687309265, "learning_rate": 4.721557216161488e-05, "loss": 2.7641, "step": 6610 }, { "epoch": 0.5421060722521137, "grad_norm": 0.3859403431415558, "learning_rate": 4.718850156607702e-05, "loss": 2.7343, "step": 6612 }, { "epoch": 0.5422700486805022, "grad_norm": 0.3834199905395508, "learning_rate": 4.716143179725367e-05, "loss": 2.7267, "step": 6614 }, { "epoch": 0.5424340251088906, "grad_norm": 0.3715173006057739, "learning_rate": 4.713436286310461e-05, "loss": 2.8098, "step": 6616 }, { "epoch": 0.542598001537279, "grad_norm": 0.3994891047477722, "learning_rate": 4.7107294771589416e-05, "loss": 2.686, "step": 6618 }, { "epoch": 0.5427619779656674, "grad_norm": 0.4067355692386627, "learning_rate": 4.7080227530667406e-05, "loss": 2.7673, "step": 6620 }, { "epoch": 0.5429259543940559, "grad_norm": 0.45450058579444885, "learning_rate": 4.7053161148297626e-05, "loss": 2.7531, "step": 6622 }, { "epoch": 0.5430899308224443, "grad_norm": 0.4142906963825226, "learning_rate": 4.7026095632438885e-05, "loss": 2.7542, "step": 6624 }, { "epoch": 0.5432539072508327, "grad_norm": 0.43223828077316284, "learning_rate": 4.6999030991049735e-05, "loss": 2.7488, "step": 6626 }, { "epoch": 0.5434178836792211, "grad_norm": 0.4043966233730316, "learning_rate": 4.697196723208845e-05, "loss": 2.7523, "step": 6628 }, { "epoch": 0.5435818601076096, "grad_norm": 0.40228337049484253, "learning_rate": 4.6944904363513096e-05, "loss": 2.7256, "step": 6630 }, { "epoch": 0.543745836535998, "grad_norm": 0.4250689446926117, "learning_rate": 4.6917842393281444e-05, "loss": 2.8177, "step": 6632 }, { "epoch": 0.5439098129643863, "grad_norm": 0.432136207818985, "learning_rate": 4.6890781329350963e-05, "loss": 2.741, "step": 6634 }, { "epoch": 0.5440737893927748, "grad_norm": 0.4338907301425934, "learning_rate": 4.686372117967895e-05, "loss": 2.7243, "step": 6636 }, { "epoch": 0.5442377658211632, "grad_norm": 0.4134625494480133, "learning_rate": 4.683666195222235e-05, "loss": 2.7458, "step": 6638 }, { "epoch": 0.5444017422495516, "grad_norm": 0.39210569858551025, "learning_rate": 4.680960365493785e-05, "loss": 2.7762, "step": 6640 }, { "epoch": 0.54456571867794, "grad_norm": 0.4027068614959717, "learning_rate": 4.6782546295781914e-05, "loss": 2.7606, "step": 6642 }, { "epoch": 0.5447296951063285, "grad_norm": 0.40476635098457336, "learning_rate": 4.6755489882710666e-05, "loss": 2.7356, "step": 6644 }, { "epoch": 0.5448936715347169, "grad_norm": 0.438758909702301, "learning_rate": 4.672843442367999e-05, "loss": 2.7849, "step": 6646 }, { "epoch": 0.5450576479631053, "grad_norm": 0.3990931212902069, "learning_rate": 4.6701379926645495e-05, "loss": 2.7732, "step": 6648 }, { "epoch": 0.5452216243914937, "grad_norm": 0.457121878862381, "learning_rate": 4.6674326399562476e-05, "loss": 2.8191, "step": 6650 }, { "epoch": 0.5453856008198822, "grad_norm": 0.43321701884269714, "learning_rate": 4.664727385038594e-05, "loss": 2.7818, "step": 6652 }, { "epoch": 0.5455495772482706, "grad_norm": 0.43147024512290955, "learning_rate": 4.662022228707067e-05, "loss": 2.7077, "step": 6654 }, { "epoch": 0.545713553676659, "grad_norm": 0.45424506068229675, "learning_rate": 4.65931717175711e-05, "loss": 2.7434, "step": 6656 }, { "epoch": 0.5458775301050474, "grad_norm": 0.4286510646343231, "learning_rate": 4.656612214984135e-05, "loss": 2.7395, "step": 6658 }, { "epoch": 0.5460415065334359, "grad_norm": 0.38945695757865906, "learning_rate": 4.653907359183533e-05, "loss": 2.7433, "step": 6660 }, { "epoch": 0.5462054829618243, "grad_norm": 0.418412983417511, "learning_rate": 4.651202605150658e-05, "loss": 2.7534, "step": 6662 }, { "epoch": 0.5463694593902126, "grad_norm": 0.4252218008041382, "learning_rate": 4.648497953680838e-05, "loss": 2.7365, "step": 6664 }, { "epoch": 0.546533435818601, "grad_norm": 0.4022780656814575, "learning_rate": 4.6457934055693684e-05, "loss": 2.7434, "step": 6666 }, { "epoch": 0.5466974122469895, "grad_norm": 0.4030002951622009, "learning_rate": 4.643088961611513e-05, "loss": 2.7049, "step": 6668 }, { "epoch": 0.5468613886753779, "grad_norm": 0.401536226272583, "learning_rate": 4.640384622602512e-05, "loss": 2.7702, "step": 6670 }, { "epoch": 0.5470253651037663, "grad_norm": 0.4336666762828827, "learning_rate": 4.637680389337567e-05, "loss": 2.7814, "step": 6672 }, { "epoch": 0.5471893415321547, "grad_norm": 0.4129686951637268, "learning_rate": 4.6349762626118505e-05, "loss": 2.7637, "step": 6674 }, { "epoch": 0.5473533179605432, "grad_norm": 0.41660234332084656, "learning_rate": 4.6322722432205076e-05, "loss": 2.731, "step": 6676 }, { "epoch": 0.5475172943889316, "grad_norm": 0.40781137347221375, "learning_rate": 4.629568331958647e-05, "loss": 2.8013, "step": 6678 }, { "epoch": 0.54768127081732, "grad_norm": 0.38069507479667664, "learning_rate": 4.626864529621346e-05, "loss": 2.7777, "step": 6680 }, { "epoch": 0.5478452472457084, "grad_norm": 0.37233811616897583, "learning_rate": 4.624160837003655e-05, "loss": 2.7827, "step": 6682 }, { "epoch": 0.5480092236740969, "grad_norm": 0.4252106845378876, "learning_rate": 4.621457254900586e-05, "loss": 2.7698, "step": 6684 }, { "epoch": 0.5481732001024853, "grad_norm": 0.42072904109954834, "learning_rate": 4.618753784107121e-05, "loss": 2.8281, "step": 6686 }, { "epoch": 0.5483371765308737, "grad_norm": 0.39454519748687744, "learning_rate": 4.61605042541821e-05, "loss": 2.739, "step": 6688 }, { "epoch": 0.5485011529592622, "grad_norm": 0.40513092279434204, "learning_rate": 4.61334717962877e-05, "loss": 2.7624, "step": 6690 }, { "epoch": 0.5486651293876506, "grad_norm": 0.4346267580986023, "learning_rate": 4.6106440475336796e-05, "loss": 2.7803, "step": 6692 }, { "epoch": 0.5488291058160389, "grad_norm": 0.42634353041648865, "learning_rate": 4.6079410299277955e-05, "loss": 2.7594, "step": 6694 }, { "epoch": 0.5489930822444273, "grad_norm": 0.3908812701702118, "learning_rate": 4.605238127605929e-05, "loss": 2.7085, "step": 6696 }, { "epoch": 0.5491570586728158, "grad_norm": 0.42012181878089905, "learning_rate": 4.602535341362861e-05, "loss": 2.7227, "step": 6698 }, { "epoch": 0.5493210351012042, "grad_norm": 0.42639926075935364, "learning_rate": 4.599832671993344e-05, "loss": 2.7423, "step": 6700 }, { "epoch": 0.5494850115295926, "grad_norm": 0.3801124393939972, "learning_rate": 4.597130120292089e-05, "loss": 2.7533, "step": 6702 }, { "epoch": 0.549648987957981, "grad_norm": 0.3986124098300934, "learning_rate": 4.5944276870537747e-05, "loss": 2.7356, "step": 6704 }, { "epoch": 0.5498129643863695, "grad_norm": 0.40717771649360657, "learning_rate": 4.5917253730730444e-05, "loss": 2.7659, "step": 6706 }, { "epoch": 0.5499769408147579, "grad_norm": 0.3899606764316559, "learning_rate": 4.589023179144511e-05, "loss": 2.7652, "step": 6708 }, { "epoch": 0.5501409172431463, "grad_norm": 0.39462822675704956, "learning_rate": 4.586321106062744e-05, "loss": 2.7228, "step": 6710 }, { "epoch": 0.5503048936715347, "grad_norm": 0.4435010254383087, "learning_rate": 4.583619154622285e-05, "loss": 2.7398, "step": 6712 }, { "epoch": 0.5504688700999232, "grad_norm": 0.41980791091918945, "learning_rate": 4.580917325617634e-05, "loss": 2.7475, "step": 6714 }, { "epoch": 0.5506328465283116, "grad_norm": 0.41926372051239014, "learning_rate": 4.578215619843257e-05, "loss": 2.7747, "step": 6716 }, { "epoch": 0.5507968229567, "grad_norm": 0.40478911995887756, "learning_rate": 4.575514038093587e-05, "loss": 2.7238, "step": 6718 }, { "epoch": 0.5509607993850884, "grad_norm": 0.4250586926937103, "learning_rate": 4.572812581163017e-05, "loss": 2.7914, "step": 6720 }, { "epoch": 0.5511247758134769, "grad_norm": 0.3964104950428009, "learning_rate": 4.570111249845901e-05, "loss": 2.7644, "step": 6722 }, { "epoch": 0.5512887522418652, "grad_norm": 0.4339950978755951, "learning_rate": 4.567410044936564e-05, "loss": 2.72, "step": 6724 }, { "epoch": 0.5514527286702536, "grad_norm": 0.4455949366092682, "learning_rate": 4.564708967229286e-05, "loss": 2.7627, "step": 6726 }, { "epoch": 0.551616705098642, "grad_norm": 0.42049476504325867, "learning_rate": 4.5620080175183116e-05, "loss": 2.7423, "step": 6728 }, { "epoch": 0.5517806815270305, "grad_norm": 0.4246431887149811, "learning_rate": 4.559307196597852e-05, "loss": 2.7474, "step": 6730 }, { "epoch": 0.5519446579554189, "grad_norm": 0.45321446657180786, "learning_rate": 4.556606505262074e-05, "loss": 2.7179, "step": 6732 }, { "epoch": 0.5521086343838073, "grad_norm": 0.39583444595336914, "learning_rate": 4.553905944305111e-05, "loss": 2.7737, "step": 6734 }, { "epoch": 0.5522726108121957, "grad_norm": 0.39999717473983765, "learning_rate": 4.5512055145210576e-05, "loss": 2.7749, "step": 6736 }, { "epoch": 0.5524365872405842, "grad_norm": 0.3975955545902252, "learning_rate": 4.548505216703968e-05, "loss": 2.7756, "step": 6738 }, { "epoch": 0.5526005636689726, "grad_norm": 0.3958473205566406, "learning_rate": 4.545805051647855e-05, "loss": 2.752, "step": 6740 }, { "epoch": 0.552764540097361, "grad_norm": 0.37771138548851013, "learning_rate": 4.543105020146702e-05, "loss": 2.8027, "step": 6742 }, { "epoch": 0.5529285165257494, "grad_norm": 0.419717937707901, "learning_rate": 4.540405122994444e-05, "loss": 2.7716, "step": 6744 }, { "epoch": 0.5530924929541379, "grad_norm": 0.4406159818172455, "learning_rate": 4.5377053609849764e-05, "loss": 2.7907, "step": 6746 }, { "epoch": 0.5532564693825263, "grad_norm": 0.38063448667526245, "learning_rate": 4.535005734912163e-05, "loss": 2.7008, "step": 6748 }, { "epoch": 0.5534204458109147, "grad_norm": 0.440857857465744, "learning_rate": 4.532306245569821e-05, "loss": 2.748, "step": 6750 }, { "epoch": 0.5535844222393032, "grad_norm": 0.44015419483184814, "learning_rate": 4.529606893751727e-05, "loss": 2.7657, "step": 6752 }, { "epoch": 0.5537483986676915, "grad_norm": 0.4137510061264038, "learning_rate": 4.526907680251621e-05, "loss": 2.7373, "step": 6754 }, { "epoch": 0.5539123750960799, "grad_norm": 0.42115190625190735, "learning_rate": 4.524208605863198e-05, "loss": 2.7341, "step": 6756 }, { "epoch": 0.5540763515244683, "grad_norm": 0.42313051223754883, "learning_rate": 4.521509671380117e-05, "loss": 2.7291, "step": 6758 }, { "epoch": 0.5542403279528568, "grad_norm": 0.4206005930900574, "learning_rate": 4.518810877595993e-05, "loss": 2.7165, "step": 6760 }, { "epoch": 0.5544043043812452, "grad_norm": 0.41328859329223633, "learning_rate": 4.5161122253043995e-05, "loss": 2.748, "step": 6762 }, { "epoch": 0.5545682808096336, "grad_norm": 0.43345096707344055, "learning_rate": 4.513413715298867e-05, "loss": 2.7707, "step": 6764 }, { "epoch": 0.554732257238022, "grad_norm": 0.4347042143344879, "learning_rate": 4.51071534837289e-05, "loss": 2.7036, "step": 6766 }, { "epoch": 0.5548962336664105, "grad_norm": 0.41023269295692444, "learning_rate": 4.508017125319914e-05, "loss": 2.7155, "step": 6768 }, { "epoch": 0.5550602100947989, "grad_norm": 0.38797706365585327, "learning_rate": 4.505319046933344e-05, "loss": 2.7527, "step": 6770 }, { "epoch": 0.5552241865231873, "grad_norm": 0.40941694378852844, "learning_rate": 4.502621114006548e-05, "loss": 2.7829, "step": 6772 }, { "epoch": 0.5553881629515757, "grad_norm": 0.42043906450271606, "learning_rate": 4.4999233273328436e-05, "loss": 2.7298, "step": 6774 }, { "epoch": 0.5555521393799642, "grad_norm": 0.4022752344608307, "learning_rate": 4.497225687705509e-05, "loss": 2.7405, "step": 6776 }, { "epoch": 0.5557161158083526, "grad_norm": 0.4208052158355713, "learning_rate": 4.49452819591778e-05, "loss": 2.7314, "step": 6778 }, { "epoch": 0.555880092236741, "grad_norm": 0.45849183201789856, "learning_rate": 4.4918308527628466e-05, "loss": 2.7521, "step": 6780 }, { "epoch": 0.5560440686651293, "grad_norm": 0.41802626848220825, "learning_rate": 4.489133659033854e-05, "loss": 2.8101, "step": 6782 }, { "epoch": 0.5562080450935178, "grad_norm": 0.4206705391407013, "learning_rate": 4.4864366155239114e-05, "loss": 2.7493, "step": 6784 }, { "epoch": 0.5563720215219062, "grad_norm": 0.4085046052932739, "learning_rate": 4.483739723026075e-05, "loss": 2.7551, "step": 6786 }, { "epoch": 0.5565359979502946, "grad_norm": 0.44878071546554565, "learning_rate": 4.481042982333356e-05, "loss": 2.7041, "step": 6788 }, { "epoch": 0.556699974378683, "grad_norm": 0.48486924171447754, "learning_rate": 4.478346394238731e-05, "loss": 2.7416, "step": 6790 }, { "epoch": 0.5568639508070715, "grad_norm": 0.44023939967155457, "learning_rate": 4.475649959535123e-05, "loss": 2.755, "step": 6792 }, { "epoch": 0.5570279272354599, "grad_norm": 0.41545045375823975, "learning_rate": 4.472953679015409e-05, "loss": 2.7685, "step": 6794 }, { "epoch": 0.5571919036638483, "grad_norm": 0.4188261926174164, "learning_rate": 4.47025755347243e-05, "loss": 2.7642, "step": 6796 }, { "epoch": 0.5573558800922367, "grad_norm": 0.4364667534828186, "learning_rate": 4.4675615836989716e-05, "loss": 2.841, "step": 6798 }, { "epoch": 0.5575198565206252, "grad_norm": 0.4194522202014923, "learning_rate": 4.464865770487777e-05, "loss": 2.7053, "step": 6800 }, { "epoch": 0.5576838329490136, "grad_norm": 0.4182877242565155, "learning_rate": 4.462170114631546e-05, "loss": 2.7184, "step": 6802 }, { "epoch": 0.557847809377402, "grad_norm": 0.40800806879997253, "learning_rate": 4.459474616922928e-05, "loss": 2.7706, "step": 6804 }, { "epoch": 0.5580117858057905, "grad_norm": 0.39892709255218506, "learning_rate": 4.456779278154527e-05, "loss": 2.7453, "step": 6806 }, { "epoch": 0.5581757622341789, "grad_norm": 0.3947122097015381, "learning_rate": 4.454084099118904e-05, "loss": 2.7564, "step": 6808 }, { "epoch": 0.5583397386625673, "grad_norm": 0.3964519798755646, "learning_rate": 4.451389080608569e-05, "loss": 2.7603, "step": 6810 }, { "epoch": 0.5585037150909556, "grad_norm": 0.4413125514984131, "learning_rate": 4.448694223415983e-05, "loss": 2.7284, "step": 6812 }, { "epoch": 0.5586676915193441, "grad_norm": 0.40390545129776, "learning_rate": 4.445999528333567e-05, "loss": 2.6989, "step": 6814 }, { "epoch": 0.5588316679477325, "grad_norm": 0.41920092701911926, "learning_rate": 4.4433049961536874e-05, "loss": 2.718, "step": 6816 }, { "epoch": 0.5589956443761209, "grad_norm": 0.4399784207344055, "learning_rate": 4.4406106276686624e-05, "loss": 2.814, "step": 6818 }, { "epoch": 0.5591596208045093, "grad_norm": 0.45444971323013306, "learning_rate": 4.4379164236707706e-05, "loss": 2.7831, "step": 6820 }, { "epoch": 0.5593235972328978, "grad_norm": 0.42461079359054565, "learning_rate": 4.435222384952233e-05, "loss": 2.7525, "step": 6822 }, { "epoch": 0.5594875736612862, "grad_norm": 0.4275374710559845, "learning_rate": 4.4325285123052243e-05, "loss": 2.7201, "step": 6824 }, { "epoch": 0.5596515500896746, "grad_norm": 0.4361192584037781, "learning_rate": 4.429834806521874e-05, "loss": 2.7672, "step": 6826 }, { "epoch": 0.559815526518063, "grad_norm": 0.38457778096199036, "learning_rate": 4.427141268394258e-05, "loss": 2.7359, "step": 6828 }, { "epoch": 0.5599795029464515, "grad_norm": 0.3938714861869812, "learning_rate": 4.4244478987144034e-05, "loss": 2.7471, "step": 6830 }, { "epoch": 0.5601434793748399, "grad_norm": 0.398783802986145, "learning_rate": 4.421754698274294e-05, "loss": 2.8162, "step": 6832 }, { "epoch": 0.5603074558032283, "grad_norm": 0.4110495448112488, "learning_rate": 4.4190616678658566e-05, "loss": 2.7719, "step": 6834 }, { "epoch": 0.5604714322316167, "grad_norm": 0.43369707465171814, "learning_rate": 4.4163688082809674e-05, "loss": 2.7541, "step": 6836 }, { "epoch": 0.5606354086600052, "grad_norm": 0.4118701219558716, "learning_rate": 4.4136761203114605e-05, "loss": 2.7743, "step": 6838 }, { "epoch": 0.5607993850883936, "grad_norm": 0.38589438796043396, "learning_rate": 4.410983604749113e-05, "loss": 2.7358, "step": 6840 }, { "epoch": 0.5609633615167819, "grad_norm": 0.3737694025039673, "learning_rate": 4.408291262385651e-05, "loss": 2.733, "step": 6842 }, { "epoch": 0.5611273379451703, "grad_norm": 0.4078843593597412, "learning_rate": 4.405599094012752e-05, "loss": 2.75, "step": 6844 }, { "epoch": 0.5612913143735588, "grad_norm": 0.3856929838657379, "learning_rate": 4.402907100422043e-05, "loss": 2.6992, "step": 6846 }, { "epoch": 0.5614552908019472, "grad_norm": 0.4095956087112427, "learning_rate": 4.400215282405097e-05, "loss": 2.7023, "step": 6848 }, { "epoch": 0.5616192672303356, "grad_norm": 0.4438058137893677, "learning_rate": 4.397523640753438e-05, "loss": 2.683, "step": 6850 }, { "epoch": 0.561783243658724, "grad_norm": 0.4461391568183899, "learning_rate": 4.394832176258537e-05, "loss": 2.7345, "step": 6852 }, { "epoch": 0.5619472200871125, "grad_norm": 0.43370214104652405, "learning_rate": 4.392140889711809e-05, "loss": 2.7888, "step": 6854 }, { "epoch": 0.5621111965155009, "grad_norm": 0.4333142340183258, "learning_rate": 4.389449781904627e-05, "loss": 2.7502, "step": 6856 }, { "epoch": 0.5622751729438893, "grad_norm": 0.40552252531051636, "learning_rate": 4.386758853628301e-05, "loss": 2.791, "step": 6858 }, { "epoch": 0.5624391493722777, "grad_norm": 0.41940203309059143, "learning_rate": 4.3840681056740904e-05, "loss": 2.778, "step": 6860 }, { "epoch": 0.5626031258006662, "grad_norm": 0.3927070200443268, "learning_rate": 4.3813775388332076e-05, "loss": 2.7979, "step": 6862 }, { "epoch": 0.5627671022290546, "grad_norm": 0.4172367751598358, "learning_rate": 4.3786871538968054e-05, "loss": 2.8007, "step": 6864 }, { "epoch": 0.562931078657443, "grad_norm": 0.4077468514442444, "learning_rate": 4.375996951655984e-05, "loss": 2.7384, "step": 6866 }, { "epoch": 0.5630950550858315, "grad_norm": 0.40657129883766174, "learning_rate": 4.373306932901794e-05, "loss": 2.7395, "step": 6868 }, { "epoch": 0.5632590315142199, "grad_norm": 0.37659966945648193, "learning_rate": 4.370617098425226e-05, "loss": 2.7772, "step": 6870 }, { "epoch": 0.5634230079426082, "grad_norm": 0.38550299406051636, "learning_rate": 4.367927449017221e-05, "loss": 2.7439, "step": 6872 }, { "epoch": 0.5635869843709966, "grad_norm": 0.3852583169937134, "learning_rate": 4.365237985468664e-05, "loss": 2.7548, "step": 6874 }, { "epoch": 0.5637509607993851, "grad_norm": 0.40112239122390747, "learning_rate": 4.362548708570386e-05, "loss": 2.7937, "step": 6876 }, { "epoch": 0.5639149372277735, "grad_norm": 0.3871251344680786, "learning_rate": 4.35985961911316e-05, "loss": 2.7476, "step": 6878 }, { "epoch": 0.5640789136561619, "grad_norm": 0.39240968227386475, "learning_rate": 4.3571707178877096e-05, "loss": 2.7073, "step": 6880 }, { "epoch": 0.5642428900845503, "grad_norm": 0.3936747610569, "learning_rate": 4.3544820056846995e-05, "loss": 2.7616, "step": 6882 }, { "epoch": 0.5644068665129388, "grad_norm": 0.3985251784324646, "learning_rate": 4.351793483294736e-05, "loss": 2.753, "step": 6884 }, { "epoch": 0.5645708429413272, "grad_norm": 0.4452661871910095, "learning_rate": 4.349105151508377e-05, "loss": 2.7652, "step": 6886 }, { "epoch": 0.5647348193697156, "grad_norm": 0.431638240814209, "learning_rate": 4.346417011116118e-05, "loss": 2.8226, "step": 6888 }, { "epoch": 0.564898795798104, "grad_norm": 0.4476519525051117, "learning_rate": 4.3437290629084e-05, "loss": 2.7056, "step": 6890 }, { "epoch": 0.5650627722264925, "grad_norm": 0.39053019881248474, "learning_rate": 4.34104130767561e-05, "loss": 2.7887, "step": 6892 }, { "epoch": 0.5652267486548809, "grad_norm": 0.3967573046684265, "learning_rate": 4.338353746208073e-05, "loss": 2.7063, "step": 6894 }, { "epoch": 0.5653907250832693, "grad_norm": 0.3882686197757721, "learning_rate": 4.335666379296062e-05, "loss": 2.7154, "step": 6896 }, { "epoch": 0.5655547015116577, "grad_norm": 0.4172016680240631, "learning_rate": 4.3329792077297914e-05, "loss": 2.7593, "step": 6898 }, { "epoch": 0.5657186779400462, "grad_norm": 0.4219083786010742, "learning_rate": 4.330292232299417e-05, "loss": 2.7654, "step": 6900 }, { "epoch": 0.5658826543684345, "grad_norm": 0.3960023820400238, "learning_rate": 4.327605453795036e-05, "loss": 2.7306, "step": 6902 }, { "epoch": 0.5660466307968229, "grad_norm": 0.40032973885536194, "learning_rate": 4.324918873006692e-05, "loss": 2.7474, "step": 6904 }, { "epoch": 0.5662106072252113, "grad_norm": 0.36831405758857727, "learning_rate": 4.322232490724367e-05, "loss": 2.7406, "step": 6906 }, { "epoch": 0.5663745836535998, "grad_norm": 0.395846962928772, "learning_rate": 4.319546307737983e-05, "loss": 2.7481, "step": 6908 }, { "epoch": 0.5665385600819882, "grad_norm": 0.4093070328235626, "learning_rate": 4.3168603248374096e-05, "loss": 2.6996, "step": 6910 }, { "epoch": 0.5667025365103766, "grad_norm": 0.4207175374031067, "learning_rate": 4.314174542812452e-05, "loss": 2.7349, "step": 6912 }, { "epoch": 0.566866512938765, "grad_norm": 0.3914620578289032, "learning_rate": 4.311488962452857e-05, "loss": 2.7575, "step": 6914 }, { "epoch": 0.5670304893671535, "grad_norm": 0.3748440146446228, "learning_rate": 4.3088035845483155e-05, "loss": 2.7483, "step": 6916 }, { "epoch": 0.5671944657955419, "grad_norm": 0.3811940550804138, "learning_rate": 4.306118409888455e-05, "loss": 2.7513, "step": 6918 }, { "epoch": 0.5673584422239303, "grad_norm": 0.3829500675201416, "learning_rate": 4.303433439262843e-05, "loss": 2.7502, "step": 6920 }, { "epoch": 0.5675224186523188, "grad_norm": 0.4123048782348633, "learning_rate": 4.300748673460993e-05, "loss": 2.754, "step": 6922 }, { "epoch": 0.5676863950807072, "grad_norm": 0.38617756962776184, "learning_rate": 4.298064113272353e-05, "loss": 2.7199, "step": 6924 }, { "epoch": 0.5678503715090956, "grad_norm": 0.37602609395980835, "learning_rate": 4.2953797594863074e-05, "loss": 2.727, "step": 6926 }, { "epoch": 0.568014347937484, "grad_norm": 0.40039005875587463, "learning_rate": 4.2926956128921895e-05, "loss": 2.8184, "step": 6928 }, { "epoch": 0.5681783243658725, "grad_norm": 0.3854617178440094, "learning_rate": 4.290011674279264e-05, "loss": 2.77, "step": 6930 }, { "epoch": 0.5683423007942608, "grad_norm": 0.3800494968891144, "learning_rate": 4.2873279444367346e-05, "loss": 2.7059, "step": 6932 }, { "epoch": 0.5685062772226492, "grad_norm": 0.3799922466278076, "learning_rate": 4.28464442415375e-05, "loss": 2.7154, "step": 6934 }, { "epoch": 0.5686702536510376, "grad_norm": 0.39791008830070496, "learning_rate": 4.281961114219392e-05, "loss": 2.7433, "step": 6936 }, { "epoch": 0.5688342300794261, "grad_norm": 0.3874998092651367, "learning_rate": 4.279278015422678e-05, "loss": 2.7691, "step": 6938 }, { "epoch": 0.5689982065078145, "grad_norm": 0.40580660104751587, "learning_rate": 4.276595128552572e-05, "loss": 2.7029, "step": 6940 }, { "epoch": 0.5691621829362029, "grad_norm": 0.4420451521873474, "learning_rate": 4.273912454397968e-05, "loss": 2.7574, "step": 6942 }, { "epoch": 0.5693261593645913, "grad_norm": 0.42256930470466614, "learning_rate": 4.2712299937476976e-05, "loss": 2.7133, "step": 6944 }, { "epoch": 0.5694901357929798, "grad_norm": 0.414419949054718, "learning_rate": 4.2685477473905366e-05, "loss": 2.7022, "step": 6946 }, { "epoch": 0.5696541122213682, "grad_norm": 0.40276476740837097, "learning_rate": 4.265865716115191e-05, "loss": 2.7375, "step": 6948 }, { "epoch": 0.5698180886497566, "grad_norm": 0.4238226115703583, "learning_rate": 4.263183900710304e-05, "loss": 2.7346, "step": 6950 }, { "epoch": 0.569982065078145, "grad_norm": 0.41304221749305725, "learning_rate": 4.260502301964462e-05, "loss": 2.7534, "step": 6952 }, { "epoch": 0.5701460415065335, "grad_norm": 0.3909514546394348, "learning_rate": 4.257820920666179e-05, "loss": 2.7477, "step": 6954 }, { "epoch": 0.5703100179349219, "grad_norm": 0.3825553357601166, "learning_rate": 4.255139757603907e-05, "loss": 2.7187, "step": 6956 }, { "epoch": 0.5704739943633103, "grad_norm": 0.40624940395355225, "learning_rate": 4.252458813566042e-05, "loss": 2.7643, "step": 6958 }, { "epoch": 0.5706379707916986, "grad_norm": 0.4360128939151764, "learning_rate": 4.249778089340905e-05, "loss": 2.8152, "step": 6960 }, { "epoch": 0.5708019472200871, "grad_norm": 0.3969877362251282, "learning_rate": 4.2470975857167566e-05, "loss": 2.7356, "step": 6962 }, { "epoch": 0.5709659236484755, "grad_norm": 0.3684930205345154, "learning_rate": 4.244417303481795e-05, "loss": 2.7356, "step": 6964 }, { "epoch": 0.5711299000768639, "grad_norm": 0.3783034682273865, "learning_rate": 4.2417372434241484e-05, "loss": 2.7531, "step": 6966 }, { "epoch": 0.5712938765052523, "grad_norm": 0.37606626749038696, "learning_rate": 4.239057406331881e-05, "loss": 2.75, "step": 6968 }, { "epoch": 0.5714578529336408, "grad_norm": 0.41963014006614685, "learning_rate": 4.236377792992997e-05, "loss": 2.7568, "step": 6970 }, { "epoch": 0.5716218293620292, "grad_norm": 0.4141043722629547, "learning_rate": 4.233698404195429e-05, "loss": 2.7553, "step": 6972 }, { "epoch": 0.5717858057904176, "grad_norm": 0.4187765419483185, "learning_rate": 4.231019240727041e-05, "loss": 2.7638, "step": 6974 }, { "epoch": 0.5719497822188061, "grad_norm": 0.3854373097419739, "learning_rate": 4.2283403033756395e-05, "loss": 2.7458, "step": 6976 }, { "epoch": 0.5721137586471945, "grad_norm": 0.3839758038520813, "learning_rate": 4.225661592928958e-05, "loss": 2.718, "step": 6978 }, { "epoch": 0.5722777350755829, "grad_norm": 0.3789431154727936, "learning_rate": 4.222983110174664e-05, "loss": 2.7418, "step": 6980 }, { "epoch": 0.5724417115039713, "grad_norm": 0.3935195207595825, "learning_rate": 4.220304855900361e-05, "loss": 2.7965, "step": 6982 }, { "epoch": 0.5726056879323598, "grad_norm": 0.38839125633239746, "learning_rate": 4.217626830893583e-05, "loss": 2.8064, "step": 6984 }, { "epoch": 0.5727696643607482, "grad_norm": 0.3698122203350067, "learning_rate": 4.2149490359417946e-05, "loss": 2.7358, "step": 6986 }, { "epoch": 0.5729336407891366, "grad_norm": 0.370185911655426, "learning_rate": 4.2122714718323994e-05, "loss": 2.7785, "step": 6988 }, { "epoch": 0.573097617217525, "grad_norm": 0.38085514307022095, "learning_rate": 4.209594139352725e-05, "loss": 2.7234, "step": 6990 }, { "epoch": 0.5732615936459134, "grad_norm": 0.3918403089046478, "learning_rate": 4.206917039290035e-05, "loss": 2.7152, "step": 6992 }, { "epoch": 0.5734255700743018, "grad_norm": 0.4009610712528229, "learning_rate": 4.2042401724315266e-05, "loss": 2.7066, "step": 6994 }, { "epoch": 0.5735895465026902, "grad_norm": 0.3863372802734375, "learning_rate": 4.2015635395643256e-05, "loss": 2.7088, "step": 6996 }, { "epoch": 0.5737535229310786, "grad_norm": 0.4005741775035858, "learning_rate": 4.198887141475487e-05, "loss": 2.7371, "step": 6998 }, { "epoch": 0.5739174993594671, "grad_norm": 0.3993152678012848, "learning_rate": 4.1962109789520034e-05, "loss": 2.774, "step": 7000 }, { "epoch": 0.5740814757878555, "grad_norm": 0.4045857787132263, "learning_rate": 4.1935350527807915e-05, "loss": 2.752, "step": 7002 }, { "epoch": 0.5742454522162439, "grad_norm": 0.37730270624160767, "learning_rate": 4.190859363748701e-05, "loss": 2.7201, "step": 7004 }, { "epoch": 0.5744094286446323, "grad_norm": 0.38126033544540405, "learning_rate": 4.1881839126425145e-05, "loss": 2.7285, "step": 7006 }, { "epoch": 0.5745734050730208, "grad_norm": 0.42331182956695557, "learning_rate": 4.1855087002489385e-05, "loss": 2.7279, "step": 7008 }, { "epoch": 0.5747373815014092, "grad_norm": 0.4086098372936249, "learning_rate": 4.182833727354615e-05, "loss": 2.7349, "step": 7010 }, { "epoch": 0.5749013579297976, "grad_norm": 0.40086227655410767, "learning_rate": 4.180158994746114e-05, "loss": 2.7495, "step": 7012 }, { "epoch": 0.575065334358186, "grad_norm": 0.450406938791275, "learning_rate": 4.177484503209934e-05, "loss": 2.7815, "step": 7014 }, { "epoch": 0.5752293107865745, "grad_norm": 0.42005404829978943, "learning_rate": 4.1748102535325e-05, "loss": 2.7631, "step": 7016 }, { "epoch": 0.5753932872149629, "grad_norm": 0.38951101899147034, "learning_rate": 4.1721362465001734e-05, "loss": 2.7027, "step": 7018 }, { "epoch": 0.5755572636433512, "grad_norm": 0.4067320227622986, "learning_rate": 4.1694624828992377e-05, "loss": 2.7211, "step": 7020 }, { "epoch": 0.5757212400717396, "grad_norm": 0.4426279366016388, "learning_rate": 4.1667889635159044e-05, "loss": 2.8495, "step": 7022 }, { "epoch": 0.5758852165001281, "grad_norm": 0.4493538737297058, "learning_rate": 4.1641156891363206e-05, "loss": 2.7582, "step": 7024 }, { "epoch": 0.5760491929285165, "grad_norm": 0.46079501509666443, "learning_rate": 4.161442660546553e-05, "loss": 2.7274, "step": 7026 }, { "epoch": 0.5762131693569049, "grad_norm": 0.49728041887283325, "learning_rate": 4.158769878532599e-05, "loss": 2.7489, "step": 7028 }, { "epoch": 0.5763771457852933, "grad_norm": 0.4794892370700836, "learning_rate": 4.156097343880386e-05, "loss": 2.7606, "step": 7030 }, { "epoch": 0.5765411222136818, "grad_norm": 0.4507454037666321, "learning_rate": 4.153425057375766e-05, "loss": 2.7434, "step": 7032 }, { "epoch": 0.5767050986420702, "grad_norm": 0.4382268190383911, "learning_rate": 4.150753019804515e-05, "loss": 2.7202, "step": 7034 }, { "epoch": 0.5768690750704586, "grad_norm": 0.43126344680786133, "learning_rate": 4.148081231952346e-05, "loss": 2.8246, "step": 7036 }, { "epoch": 0.5770330514988471, "grad_norm": 0.4064197242259979, "learning_rate": 4.145409694604887e-05, "loss": 2.7816, "step": 7038 }, { "epoch": 0.5771970279272355, "grad_norm": 0.43481525778770447, "learning_rate": 4.142738408547696e-05, "loss": 2.7149, "step": 7040 }, { "epoch": 0.5773610043556239, "grad_norm": 0.4244084656238556, "learning_rate": 4.1400673745662644e-05, "loss": 2.7205, "step": 7042 }, { "epoch": 0.5775249807840123, "grad_norm": 0.42674392461776733, "learning_rate": 4.137396593445999e-05, "loss": 2.751, "step": 7044 }, { "epoch": 0.5776889572124008, "grad_norm": 0.40948551893234253, "learning_rate": 4.1347260659722365e-05, "loss": 2.7478, "step": 7046 }, { "epoch": 0.5778529336407892, "grad_norm": 0.3910583257675171, "learning_rate": 4.132055792930242e-05, "loss": 2.7325, "step": 7048 }, { "epoch": 0.5780169100691775, "grad_norm": 0.397865355014801, "learning_rate": 4.129385775105201e-05, "loss": 2.7733, "step": 7050 }, { "epoch": 0.5781808864975659, "grad_norm": 0.4031313359737396, "learning_rate": 4.126716013282226e-05, "loss": 2.6976, "step": 7052 }, { "epoch": 0.5783448629259544, "grad_norm": 0.41783228516578674, "learning_rate": 4.124046508246356e-05, "loss": 2.7627, "step": 7054 }, { "epoch": 0.5785088393543428, "grad_norm": 0.4214140474796295, "learning_rate": 4.121377260782551e-05, "loss": 2.7367, "step": 7056 }, { "epoch": 0.5786728157827312, "grad_norm": 0.40227892994880676, "learning_rate": 4.118708271675695e-05, "loss": 2.6962, "step": 7058 }, { "epoch": 0.5788367922111196, "grad_norm": 0.4140949249267578, "learning_rate": 4.1160395417106026e-05, "loss": 2.7505, "step": 7060 }, { "epoch": 0.5790007686395081, "grad_norm": 0.426513671875, "learning_rate": 4.113371071672005e-05, "loss": 2.7943, "step": 7062 }, { "epoch": 0.5791647450678965, "grad_norm": 0.43574535846710205, "learning_rate": 4.110702862344557e-05, "loss": 2.7699, "step": 7064 }, { "epoch": 0.5793287214962849, "grad_norm": 0.3993785083293915, "learning_rate": 4.108034914512845e-05, "loss": 2.731, "step": 7066 }, { "epoch": 0.5794926979246733, "grad_norm": 0.4101898670196533, "learning_rate": 4.105367228961369e-05, "loss": 2.776, "step": 7068 }, { "epoch": 0.5796566743530618, "grad_norm": 0.41295358538627625, "learning_rate": 4.102699806474555e-05, "loss": 2.7528, "step": 7070 }, { "epoch": 0.5798206507814502, "grad_norm": 0.39692431688308716, "learning_rate": 4.100032647836756e-05, "loss": 2.7873, "step": 7072 }, { "epoch": 0.5799846272098386, "grad_norm": 0.4573379456996918, "learning_rate": 4.097365753832241e-05, "loss": 2.7406, "step": 7074 }, { "epoch": 0.580148603638227, "grad_norm": 0.45833566784858704, "learning_rate": 4.094699125245204e-05, "loss": 2.7557, "step": 7076 }, { "epoch": 0.5803125800666155, "grad_norm": 0.4373979866504669, "learning_rate": 4.092032762859762e-05, "loss": 2.7181, "step": 7078 }, { "epoch": 0.5804765564950038, "grad_norm": 0.3798002004623413, "learning_rate": 4.089366667459952e-05, "loss": 2.7446, "step": 7080 }, { "epoch": 0.5806405329233922, "grad_norm": 0.3701511323451996, "learning_rate": 4.086700839829731e-05, "loss": 2.7273, "step": 7082 }, { "epoch": 0.5808045093517806, "grad_norm": 0.40236371755599976, "learning_rate": 4.084035280752983e-05, "loss": 2.7816, "step": 7084 }, { "epoch": 0.5809684857801691, "grad_norm": 0.4041323959827423, "learning_rate": 4.0813699910135075e-05, "loss": 2.6938, "step": 7086 }, { "epoch": 0.5811324622085575, "grad_norm": 0.4038769602775574, "learning_rate": 4.0787049713950256e-05, "loss": 2.7723, "step": 7088 }, { "epoch": 0.5812964386369459, "grad_norm": 0.403374582529068, "learning_rate": 4.076040222681183e-05, "loss": 2.7374, "step": 7090 }, { "epoch": 0.5814604150653344, "grad_norm": 0.4049570858478546, "learning_rate": 4.073375745655541e-05, "loss": 2.7626, "step": 7092 }, { "epoch": 0.5816243914937228, "grad_norm": 0.41250374913215637, "learning_rate": 4.070711541101581e-05, "loss": 2.8086, "step": 7094 }, { "epoch": 0.5817883679221112, "grad_norm": 0.3828825056552887, "learning_rate": 4.06804760980271e-05, "loss": 2.7755, "step": 7096 }, { "epoch": 0.5819523443504996, "grad_norm": 0.3960285186767578, "learning_rate": 4.0653839525422486e-05, "loss": 2.7239, "step": 7098 }, { "epoch": 0.5821163207788881, "grad_norm": 0.3921143114566803, "learning_rate": 4.062720570103439e-05, "loss": 2.7552, "step": 7100 }, { "epoch": 0.5822802972072765, "grad_norm": 0.4139832556247711, "learning_rate": 4.0600574632694426e-05, "loss": 2.7164, "step": 7102 }, { "epoch": 0.5824442736356649, "grad_norm": 0.4004044830799103, "learning_rate": 4.0573946328233406e-05, "loss": 2.7205, "step": 7104 }, { "epoch": 0.5826082500640533, "grad_norm": 0.4060121774673462, "learning_rate": 4.054732079548129e-05, "loss": 2.7587, "step": 7106 }, { "epoch": 0.5827722264924418, "grad_norm": 0.40428370237350464, "learning_rate": 4.052069804226729e-05, "loss": 2.7767, "step": 7108 }, { "epoch": 0.5829362029208301, "grad_norm": 0.3867088854312897, "learning_rate": 4.049407807641975e-05, "loss": 2.7074, "step": 7110 }, { "epoch": 0.5831001793492185, "grad_norm": 0.4056602120399475, "learning_rate": 4.0467460905766194e-05, "loss": 2.7435, "step": 7112 }, { "epoch": 0.5832641557776069, "grad_norm": 0.39436739683151245, "learning_rate": 4.044084653813337e-05, "loss": 2.7493, "step": 7114 }, { "epoch": 0.5834281322059954, "grad_norm": 0.40216735005378723, "learning_rate": 4.041423498134715e-05, "loss": 2.7319, "step": 7116 }, { "epoch": 0.5835921086343838, "grad_norm": 0.3866954445838928, "learning_rate": 4.038762624323259e-05, "loss": 2.718, "step": 7118 }, { "epoch": 0.5837560850627722, "grad_norm": 0.3950190544128418, "learning_rate": 4.0361020331613944e-05, "loss": 2.8338, "step": 7120 }, { "epoch": 0.5839200614911606, "grad_norm": 0.392497181892395, "learning_rate": 4.033441725431462e-05, "loss": 2.7203, "step": 7122 }, { "epoch": 0.5840840379195491, "grad_norm": 0.4107820391654968, "learning_rate": 4.030781701915718e-05, "loss": 2.7361, "step": 7124 }, { "epoch": 0.5842480143479375, "grad_norm": 0.4411157965660095, "learning_rate": 4.028121963396337e-05, "loss": 2.718, "step": 7126 }, { "epoch": 0.5844119907763259, "grad_norm": 0.4278470277786255, "learning_rate": 4.0254625106554076e-05, "loss": 2.6954, "step": 7128 }, { "epoch": 0.5845759672047143, "grad_norm": 0.44073304533958435, "learning_rate": 4.0228033444749345e-05, "loss": 2.6976, "step": 7130 }, { "epoch": 0.5847399436331028, "grad_norm": 0.41985079646110535, "learning_rate": 4.020144465636844e-05, "loss": 2.728, "step": 7132 }, { "epoch": 0.5849039200614912, "grad_norm": 0.4166056215763092, "learning_rate": 4.017485874922969e-05, "loss": 2.7258, "step": 7134 }, { "epoch": 0.5850678964898796, "grad_norm": 0.45667922496795654, "learning_rate": 4.014827573115062e-05, "loss": 2.7691, "step": 7136 }, { "epoch": 0.585231872918268, "grad_norm": 0.4428565204143524, "learning_rate": 4.012169560994793e-05, "loss": 2.7454, "step": 7138 }, { "epoch": 0.5853958493466564, "grad_norm": 0.4104422628879547, "learning_rate": 4.009511839343742e-05, "loss": 2.7854, "step": 7140 }, { "epoch": 0.5855598257750448, "grad_norm": 0.3911503255367279, "learning_rate": 4.0068544089434067e-05, "loss": 2.7899, "step": 7142 }, { "epoch": 0.5857238022034332, "grad_norm": 0.4035918414592743, "learning_rate": 4.004197270575198e-05, "loss": 2.7087, "step": 7144 }, { "epoch": 0.5858877786318216, "grad_norm": 0.40716925263404846, "learning_rate": 4.001540425020441e-05, "loss": 2.7271, "step": 7146 }, { "epoch": 0.5860517550602101, "grad_norm": 0.4005942940711975, "learning_rate": 3.998883873060375e-05, "loss": 2.7669, "step": 7148 }, { "epoch": 0.5862157314885985, "grad_norm": 0.37836208939552307, "learning_rate": 3.996227615476154e-05, "loss": 2.754, "step": 7150 }, { "epoch": 0.5863797079169869, "grad_norm": 0.41903597116470337, "learning_rate": 3.993571653048843e-05, "loss": 2.6871, "step": 7152 }, { "epoch": 0.5865436843453754, "grad_norm": 0.3899541199207306, "learning_rate": 3.9909159865594206e-05, "loss": 2.7085, "step": 7154 }, { "epoch": 0.5867076607737638, "grad_norm": 0.4074578881263733, "learning_rate": 3.988260616788781e-05, "loss": 2.7907, "step": 7156 }, { "epoch": 0.5868716372021522, "grad_norm": 0.4121086299419403, "learning_rate": 3.9856055445177305e-05, "loss": 2.7571, "step": 7158 }, { "epoch": 0.5870356136305406, "grad_norm": 0.39414846897125244, "learning_rate": 3.982950770526982e-05, "loss": 2.7257, "step": 7160 }, { "epoch": 0.5871995900589291, "grad_norm": 0.3592352569103241, "learning_rate": 3.980296295597172e-05, "loss": 2.691, "step": 7162 }, { "epoch": 0.5873635664873175, "grad_norm": 0.4083999693393707, "learning_rate": 3.9776421205088386e-05, "loss": 2.7582, "step": 7164 }, { "epoch": 0.5875275429157059, "grad_norm": 0.410372257232666, "learning_rate": 3.9749882460424357e-05, "loss": 2.774, "step": 7166 }, { "epoch": 0.5876915193440942, "grad_norm": 0.41757383942604065, "learning_rate": 3.9723346729783305e-05, "loss": 2.7065, "step": 7168 }, { "epoch": 0.5878554957724828, "grad_norm": 0.44156357645988464, "learning_rate": 3.9696814020967996e-05, "loss": 2.6664, "step": 7170 }, { "epoch": 0.5880194722008711, "grad_norm": 0.43870604038238525, "learning_rate": 3.967028434178028e-05, "loss": 2.7485, "step": 7172 }, { "epoch": 0.5881834486292595, "grad_norm": 0.3917291760444641, "learning_rate": 3.964375770002121e-05, "loss": 2.7919, "step": 7174 }, { "epoch": 0.5883474250576479, "grad_norm": 0.41526177525520325, "learning_rate": 3.961723410349083e-05, "loss": 2.7659, "step": 7176 }, { "epoch": 0.5885114014860364, "grad_norm": 0.39451149106025696, "learning_rate": 3.959071355998834e-05, "loss": 2.7591, "step": 7178 }, { "epoch": 0.5886753779144248, "grad_norm": 0.3906865417957306, "learning_rate": 3.9564196077312084e-05, "loss": 2.7009, "step": 7180 }, { "epoch": 0.5888393543428132, "grad_norm": 0.4263087213039398, "learning_rate": 3.953768166325944e-05, "loss": 2.7435, "step": 7182 }, { "epoch": 0.5890033307712016, "grad_norm": 0.41483640670776367, "learning_rate": 3.951117032562689e-05, "loss": 2.7062, "step": 7184 }, { "epoch": 0.5891673071995901, "grad_norm": 0.3860895335674286, "learning_rate": 3.948466207221007e-05, "loss": 2.7711, "step": 7186 }, { "epoch": 0.5893312836279785, "grad_norm": 0.4068519175052643, "learning_rate": 3.945815691080365e-05, "loss": 2.703, "step": 7188 }, { "epoch": 0.5894952600563669, "grad_norm": 0.4312509000301361, "learning_rate": 3.9431654849201404e-05, "loss": 2.7669, "step": 7190 }, { "epoch": 0.5896592364847553, "grad_norm": 0.3803481459617615, "learning_rate": 3.940515589519622e-05, "loss": 2.7223, "step": 7192 }, { "epoch": 0.5898232129131438, "grad_norm": 0.37895467877388, "learning_rate": 3.9378660056580046e-05, "loss": 2.7479, "step": 7194 }, { "epoch": 0.5899871893415322, "grad_norm": 0.4233595132827759, "learning_rate": 3.9352167341143884e-05, "loss": 2.732, "step": 7196 }, { "epoch": 0.5901511657699205, "grad_norm": 0.40764448046684265, "learning_rate": 3.932567775667792e-05, "loss": 2.7261, "step": 7198 }, { "epoch": 0.5903151421983089, "grad_norm": 0.4058440327644348, "learning_rate": 3.9299191310971314e-05, "loss": 2.7445, "step": 7200 }, { "epoch": 0.5904791186266974, "grad_norm": 0.38775965571403503, "learning_rate": 3.927270801181234e-05, "loss": 2.768, "step": 7202 }, { "epoch": 0.5906430950550858, "grad_norm": 0.36376839876174927, "learning_rate": 3.924622786698837e-05, "loss": 2.7043, "step": 7204 }, { "epoch": 0.5908070714834742, "grad_norm": 0.39008110761642456, "learning_rate": 3.921975088428582e-05, "loss": 2.7474, "step": 7206 }, { "epoch": 0.5909710479118627, "grad_norm": 0.3973616361618042, "learning_rate": 3.9193277071490164e-05, "loss": 2.664, "step": 7208 }, { "epoch": 0.5911350243402511, "grad_norm": 0.3894846439361572, "learning_rate": 3.9166806436386e-05, "loss": 2.7569, "step": 7210 }, { "epoch": 0.5912990007686395, "grad_norm": 0.3805749714374542, "learning_rate": 3.9140338986756954e-05, "loss": 2.7333, "step": 7212 }, { "epoch": 0.5914629771970279, "grad_norm": 0.4085012674331665, "learning_rate": 3.911387473038568e-05, "loss": 2.7184, "step": 7214 }, { "epoch": 0.5916269536254164, "grad_norm": 0.3935090899467468, "learning_rate": 3.908741367505397e-05, "loss": 2.7763, "step": 7216 }, { "epoch": 0.5917909300538048, "grad_norm": 0.3762976825237274, "learning_rate": 3.906095582854262e-05, "loss": 2.7076, "step": 7218 }, { "epoch": 0.5919549064821932, "grad_norm": 0.39394861459732056, "learning_rate": 3.9034501198631465e-05, "loss": 2.7718, "step": 7220 }, { "epoch": 0.5921188829105816, "grad_norm": 0.39268386363983154, "learning_rate": 3.900804979309949e-05, "loss": 2.726, "step": 7222 }, { "epoch": 0.5922828593389701, "grad_norm": 0.40367162227630615, "learning_rate": 3.898160161972463e-05, "loss": 2.7105, "step": 7224 }, { "epoch": 0.5924468357673585, "grad_norm": 0.36791446805000305, "learning_rate": 3.89551566862839e-05, "loss": 2.6979, "step": 7226 }, { "epoch": 0.5926108121957468, "grad_norm": 0.37786632776260376, "learning_rate": 3.89287150005534e-05, "loss": 2.7642, "step": 7228 }, { "epoch": 0.5927747886241352, "grad_norm": 0.36827540397644043, "learning_rate": 3.890227657030823e-05, "loss": 2.7612, "step": 7230 }, { "epoch": 0.5929387650525237, "grad_norm": 0.38697636127471924, "learning_rate": 3.8875841403322534e-05, "loss": 2.7035, "step": 7232 }, { "epoch": 0.5931027414809121, "grad_norm": 0.3698570132255554, "learning_rate": 3.8849409507369536e-05, "loss": 2.7636, "step": 7234 }, { "epoch": 0.5932667179093005, "grad_norm": 0.3766981363296509, "learning_rate": 3.882298089022147e-05, "loss": 2.7442, "step": 7236 }, { "epoch": 0.5934306943376889, "grad_norm": 0.3901519477367401, "learning_rate": 3.8796555559649594e-05, "loss": 2.765, "step": 7238 }, { "epoch": 0.5935946707660774, "grad_norm": 0.38234102725982666, "learning_rate": 3.8770133523424234e-05, "loss": 2.6888, "step": 7240 }, { "epoch": 0.5937586471944658, "grad_norm": 0.3951614797115326, "learning_rate": 3.874371478931471e-05, "loss": 2.7455, "step": 7242 }, { "epoch": 0.5939226236228542, "grad_norm": 0.393010675907135, "learning_rate": 3.8717299365089374e-05, "loss": 2.7469, "step": 7244 }, { "epoch": 0.5940866000512426, "grad_norm": 0.4106244146823883, "learning_rate": 3.869088725851566e-05, "loss": 2.7471, "step": 7246 }, { "epoch": 0.5942505764796311, "grad_norm": 0.40092745423316956, "learning_rate": 3.866447847735997e-05, "loss": 2.7754, "step": 7248 }, { "epoch": 0.5944145529080195, "grad_norm": 0.3768394887447357, "learning_rate": 3.863807302938771e-05, "loss": 2.7274, "step": 7250 }, { "epoch": 0.5945785293364079, "grad_norm": 0.4089185893535614, "learning_rate": 3.861167092236339e-05, "loss": 2.7218, "step": 7252 }, { "epoch": 0.5947425057647963, "grad_norm": 0.4022778868675232, "learning_rate": 3.8585272164050476e-05, "loss": 2.7384, "step": 7254 }, { "epoch": 0.5949064821931848, "grad_norm": 0.4059952199459076, "learning_rate": 3.8558876762211436e-05, "loss": 2.7447, "step": 7256 }, { "epoch": 0.5950704586215732, "grad_norm": 0.39446863532066345, "learning_rate": 3.8532484724607786e-05, "loss": 2.7583, "step": 7258 }, { "epoch": 0.5952344350499615, "grad_norm": 0.4041154682636261, "learning_rate": 3.850609605900007e-05, "loss": 2.7152, "step": 7260 }, { "epoch": 0.59539841147835, "grad_norm": 0.39531728625297546, "learning_rate": 3.847971077314777e-05, "loss": 2.7415, "step": 7262 }, { "epoch": 0.5955623879067384, "grad_norm": 0.42486509680747986, "learning_rate": 3.845332887480946e-05, "loss": 2.7512, "step": 7264 }, { "epoch": 0.5957263643351268, "grad_norm": 0.39751359820365906, "learning_rate": 3.842695037174264e-05, "loss": 2.7359, "step": 7266 }, { "epoch": 0.5958903407635152, "grad_norm": 0.3758830428123474, "learning_rate": 3.8400575271703854e-05, "loss": 2.7356, "step": 7268 }, { "epoch": 0.5960543171919037, "grad_norm": 0.3784623146057129, "learning_rate": 3.837420358244866e-05, "loss": 2.7445, "step": 7270 }, { "epoch": 0.5962182936202921, "grad_norm": 0.38142549991607666, "learning_rate": 3.834783531173157e-05, "loss": 2.7066, "step": 7272 }, { "epoch": 0.5963822700486805, "grad_norm": 0.39958712458610535, "learning_rate": 3.8321470467306105e-05, "loss": 2.7698, "step": 7274 }, { "epoch": 0.5965462464770689, "grad_norm": 0.3814118802547455, "learning_rate": 3.829510905692482e-05, "loss": 2.7364, "step": 7276 }, { "epoch": 0.5967102229054574, "grad_norm": 0.386503130197525, "learning_rate": 3.8268751088339195e-05, "loss": 2.7719, "step": 7278 }, { "epoch": 0.5968741993338458, "grad_norm": 0.3756306767463684, "learning_rate": 3.8242396569299735e-05, "loss": 2.7316, "step": 7280 }, { "epoch": 0.5970381757622342, "grad_norm": 0.37826624512672424, "learning_rate": 3.821604550755593e-05, "loss": 2.7203, "step": 7282 }, { "epoch": 0.5972021521906226, "grad_norm": 0.3793233633041382, "learning_rate": 3.818969791085624e-05, "loss": 2.7435, "step": 7284 }, { "epoch": 0.5973661286190111, "grad_norm": 0.3833853304386139, "learning_rate": 3.816335378694812e-05, "loss": 2.7679, "step": 7286 }, { "epoch": 0.5975301050473995, "grad_norm": 0.35013169050216675, "learning_rate": 3.813701314357801e-05, "loss": 2.693, "step": 7288 }, { "epoch": 0.5976940814757878, "grad_norm": 0.38554146885871887, "learning_rate": 3.81106759884913e-05, "loss": 2.7149, "step": 7290 }, { "epoch": 0.5978580579041762, "grad_norm": 0.39514926075935364, "learning_rate": 3.808434232943234e-05, "loss": 2.7638, "step": 7292 }, { "epoch": 0.5980220343325647, "grad_norm": 0.4564327299594879, "learning_rate": 3.805801217414454e-05, "loss": 2.7505, "step": 7294 }, { "epoch": 0.5981860107609531, "grad_norm": 0.41196030378341675, "learning_rate": 3.8031685530370184e-05, "loss": 2.7307, "step": 7296 }, { "epoch": 0.5983499871893415, "grad_norm": 0.3923788368701935, "learning_rate": 3.8005362405850554e-05, "loss": 2.7429, "step": 7298 }, { "epoch": 0.5985139636177299, "grad_norm": 0.40541306138038635, "learning_rate": 3.7979042808325925e-05, "loss": 2.7411, "step": 7300 }, { "epoch": 0.5986779400461184, "grad_norm": 0.37198057770729065, "learning_rate": 3.795272674553551e-05, "loss": 2.7234, "step": 7302 }, { "epoch": 0.5988419164745068, "grad_norm": 0.4197216033935547, "learning_rate": 3.792641422521746e-05, "loss": 2.7148, "step": 7304 }, { "epoch": 0.5990058929028952, "grad_norm": 0.4193935692310333, "learning_rate": 3.7900105255108956e-05, "loss": 2.7489, "step": 7306 }, { "epoch": 0.5991698693312836, "grad_norm": 0.434123158454895, "learning_rate": 3.787379984294605e-05, "loss": 2.7359, "step": 7308 }, { "epoch": 0.5993338457596721, "grad_norm": 0.42082515358924866, "learning_rate": 3.784749799646377e-05, "loss": 2.7424, "step": 7310 }, { "epoch": 0.5994978221880605, "grad_norm": 0.4002670347690582, "learning_rate": 3.782119972339616e-05, "loss": 2.7249, "step": 7312 }, { "epoch": 0.5996617986164489, "grad_norm": 0.3787383735179901, "learning_rate": 3.779490503147615e-05, "loss": 2.7341, "step": 7314 }, { "epoch": 0.5998257750448373, "grad_norm": 0.3705497682094574, "learning_rate": 3.776861392843561e-05, "loss": 2.7528, "step": 7316 }, { "epoch": 0.5999897514732258, "grad_norm": 0.38035187125205994, "learning_rate": 3.774232642200541e-05, "loss": 2.7124, "step": 7318 }, { "epoch": 0.6001537279016141, "grad_norm": 0.41400671005249023, "learning_rate": 3.771604251991532e-05, "loss": 2.7601, "step": 7320 }, { "epoch": 0.6003177043300025, "grad_norm": 0.41931360960006714, "learning_rate": 3.768976222989402e-05, "loss": 2.7184, "step": 7322 }, { "epoch": 0.600481680758391, "grad_norm": 0.41194209456443787, "learning_rate": 3.766348555966923e-05, "loss": 2.6892, "step": 7324 }, { "epoch": 0.6006456571867794, "grad_norm": 0.4093153178691864, "learning_rate": 3.763721251696751e-05, "loss": 2.7387, "step": 7326 }, { "epoch": 0.6008096336151678, "grad_norm": 0.44898512959480286, "learning_rate": 3.761094310951439e-05, "loss": 2.7957, "step": 7328 }, { "epoch": 0.6009736100435562, "grad_norm": 0.4230307340621948, "learning_rate": 3.758467734503433e-05, "loss": 2.7205, "step": 7330 }, { "epoch": 0.6011375864719447, "grad_norm": 0.42397361993789673, "learning_rate": 3.755841523125072e-05, "loss": 2.6792, "step": 7332 }, { "epoch": 0.6013015629003331, "grad_norm": 0.38022592663764954, "learning_rate": 3.753215677588584e-05, "loss": 2.7582, "step": 7334 }, { "epoch": 0.6014655393287215, "grad_norm": 0.4053349494934082, "learning_rate": 3.7505901986660974e-05, "loss": 2.6986, "step": 7336 }, { "epoch": 0.6016295157571099, "grad_norm": 0.39706191420555115, "learning_rate": 3.747965087129627e-05, "loss": 2.6879, "step": 7338 }, { "epoch": 0.6017934921854984, "grad_norm": 0.39096325635910034, "learning_rate": 3.745340343751077e-05, "loss": 2.758, "step": 7340 }, { "epoch": 0.6019574686138868, "grad_norm": 0.3869808614253998, "learning_rate": 3.742715969302252e-05, "loss": 2.7402, "step": 7342 }, { "epoch": 0.6021214450422752, "grad_norm": 0.4089089035987854, "learning_rate": 3.74009196455484e-05, "loss": 2.6903, "step": 7344 }, { "epoch": 0.6022854214706636, "grad_norm": 0.41569820046424866, "learning_rate": 3.7374683302804236e-05, "loss": 2.7006, "step": 7346 }, { "epoch": 0.602449397899052, "grad_norm": 0.39513155817985535, "learning_rate": 3.734845067250479e-05, "loss": 2.7673, "step": 7348 }, { "epoch": 0.6026133743274404, "grad_norm": 0.41469231247901917, "learning_rate": 3.732222176236369e-05, "loss": 2.7501, "step": 7350 }, { "epoch": 0.6027773507558288, "grad_norm": 0.4159885346889496, "learning_rate": 3.729599658009346e-05, "loss": 2.748, "step": 7352 }, { "epoch": 0.6029413271842172, "grad_norm": 0.4074746370315552, "learning_rate": 3.7269775133405594e-05, "loss": 2.745, "step": 7354 }, { "epoch": 0.6031053036126057, "grad_norm": 0.40688809752464294, "learning_rate": 3.7243557430010424e-05, "loss": 2.7292, "step": 7356 }, { "epoch": 0.6032692800409941, "grad_norm": 0.38384461402893066, "learning_rate": 3.7217343477617184e-05, "loss": 2.736, "step": 7358 }, { "epoch": 0.6034332564693825, "grad_norm": 0.3964248299598694, "learning_rate": 3.719113328393408e-05, "loss": 2.698, "step": 7360 }, { "epoch": 0.6035972328977709, "grad_norm": 0.43690356612205505, "learning_rate": 3.7164926856668115e-05, "loss": 2.7602, "step": 7362 }, { "epoch": 0.6037612093261594, "grad_norm": 0.40876147150993347, "learning_rate": 3.7138724203525215e-05, "loss": 2.7014, "step": 7364 }, { "epoch": 0.6039251857545478, "grad_norm": 0.42225024104118347, "learning_rate": 3.7112525332210255e-05, "loss": 2.7621, "step": 7366 }, { "epoch": 0.6040891621829362, "grad_norm": 0.3870079517364502, "learning_rate": 3.708633025042694e-05, "loss": 2.7207, "step": 7368 }, { "epoch": 0.6042531386113246, "grad_norm": 0.3651062548160553, "learning_rate": 3.7060138965877835e-05, "loss": 2.704, "step": 7370 }, { "epoch": 0.6044171150397131, "grad_norm": 0.3923655152320862, "learning_rate": 3.703395148626447e-05, "loss": 2.7833, "step": 7372 }, { "epoch": 0.6045810914681015, "grad_norm": 0.40090101957321167, "learning_rate": 3.7007767819287195e-05, "loss": 2.747, "step": 7374 }, { "epoch": 0.6047450678964899, "grad_norm": 0.3791881203651428, "learning_rate": 3.698158797264524e-05, "loss": 2.7639, "step": 7376 }, { "epoch": 0.6049090443248784, "grad_norm": 0.4073001444339752, "learning_rate": 3.6955411954036755e-05, "loss": 2.707, "step": 7378 }, { "epoch": 0.6050730207532667, "grad_norm": 0.3848922550678253, "learning_rate": 3.692923977115872e-05, "loss": 2.7157, "step": 7380 }, { "epoch": 0.6052369971816551, "grad_norm": 0.3976440727710724, "learning_rate": 3.6903071431706994e-05, "loss": 2.7507, "step": 7382 }, { "epoch": 0.6054009736100435, "grad_norm": 0.431142121553421, "learning_rate": 3.687690694337634e-05, "loss": 2.7576, "step": 7384 }, { "epoch": 0.605564950038432, "grad_norm": 0.40304774045944214, "learning_rate": 3.685074631386036e-05, "loss": 2.7305, "step": 7386 }, { "epoch": 0.6057289264668204, "grad_norm": 0.3595426678657532, "learning_rate": 3.6824589550851495e-05, "loss": 2.7312, "step": 7388 }, { "epoch": 0.6058929028952088, "grad_norm": 0.3594333827495575, "learning_rate": 3.679843666204113e-05, "loss": 2.7613, "step": 7390 }, { "epoch": 0.6060568793235972, "grad_norm": 0.3855232000350952, "learning_rate": 3.677228765511943e-05, "loss": 2.7281, "step": 7392 }, { "epoch": 0.6062208557519857, "grad_norm": 0.41874638199806213, "learning_rate": 3.6746142537775443e-05, "loss": 2.7317, "step": 7394 }, { "epoch": 0.6063848321803741, "grad_norm": 0.3978760838508606, "learning_rate": 3.672000131769709e-05, "loss": 2.7232, "step": 7396 }, { "epoch": 0.6065488086087625, "grad_norm": 0.38561245799064636, "learning_rate": 3.6693864002571144e-05, "loss": 2.746, "step": 7398 }, { "epoch": 0.6067127850371509, "grad_norm": 0.3927624821662903, "learning_rate": 3.66677306000832e-05, "loss": 2.7364, "step": 7400 }, { "epoch": 0.6068767614655394, "grad_norm": 0.3884304463863373, "learning_rate": 3.664160111791775e-05, "loss": 2.7298, "step": 7402 }, { "epoch": 0.6070407378939278, "grad_norm": 0.4252575933933258, "learning_rate": 3.6615475563758086e-05, "loss": 2.707, "step": 7404 }, { "epoch": 0.6072047143223162, "grad_norm": 0.40555331110954285, "learning_rate": 3.6589353945286354e-05, "loss": 2.6956, "step": 7406 }, { "epoch": 0.6073686907507045, "grad_norm": 0.39419540762901306, "learning_rate": 3.6563236270183596e-05, "loss": 2.7786, "step": 7408 }, { "epoch": 0.607532667179093, "grad_norm": 0.3671490550041199, "learning_rate": 3.653712254612962e-05, "loss": 2.7485, "step": 7410 }, { "epoch": 0.6076966436074814, "grad_norm": 0.3971266448497772, "learning_rate": 3.6511012780803105e-05, "loss": 2.7043, "step": 7412 }, { "epoch": 0.6078606200358698, "grad_norm": 0.38373613357543945, "learning_rate": 3.648490698188159e-05, "loss": 2.7835, "step": 7414 }, { "epoch": 0.6080245964642582, "grad_norm": 0.3933662474155426, "learning_rate": 3.645880515704141e-05, "loss": 2.6655, "step": 7416 }, { "epoch": 0.6081885728926467, "grad_norm": 0.3658972978591919, "learning_rate": 3.6432707313957736e-05, "loss": 2.7397, "step": 7418 }, { "epoch": 0.6083525493210351, "grad_norm": 0.37315210700035095, "learning_rate": 3.640661346030459e-05, "loss": 2.7642, "step": 7420 }, { "epoch": 0.6085165257494235, "grad_norm": 0.4116625487804413, "learning_rate": 3.638052360375481e-05, "loss": 2.6983, "step": 7422 }, { "epoch": 0.6086805021778119, "grad_norm": 0.4126829206943512, "learning_rate": 3.635443775198004e-05, "loss": 2.7407, "step": 7424 }, { "epoch": 0.6088444786062004, "grad_norm": 0.40081050992012024, "learning_rate": 3.6328355912650794e-05, "loss": 2.721, "step": 7426 }, { "epoch": 0.6090084550345888, "grad_norm": 0.39633798599243164, "learning_rate": 3.630227809343634e-05, "loss": 2.7672, "step": 7428 }, { "epoch": 0.6091724314629772, "grad_norm": 0.36122927069664, "learning_rate": 3.627620430200481e-05, "loss": 2.7668, "step": 7430 }, { "epoch": 0.6093364078913656, "grad_norm": 0.38888874650001526, "learning_rate": 3.625013454602316e-05, "loss": 2.7688, "step": 7432 }, { "epoch": 0.6095003843197541, "grad_norm": 0.3885965645313263, "learning_rate": 3.6224068833157135e-05, "loss": 2.7836, "step": 7434 }, { "epoch": 0.6096643607481425, "grad_norm": 0.41673019528388977, "learning_rate": 3.619800717107126e-05, "loss": 2.7014, "step": 7436 }, { "epoch": 0.6098283371765308, "grad_norm": 0.4096967875957489, "learning_rate": 3.6171949567428956e-05, "loss": 2.7139, "step": 7438 }, { "epoch": 0.6099923136049193, "grad_norm": 0.3758758306503296, "learning_rate": 3.614589602989238e-05, "loss": 2.6619, "step": 7440 }, { "epoch": 0.6101562900333077, "grad_norm": 0.4018336832523346, "learning_rate": 3.6119846566122505e-05, "loss": 2.6948, "step": 7442 }, { "epoch": 0.6103202664616961, "grad_norm": 0.402338445186615, "learning_rate": 3.609380118377913e-05, "loss": 2.768, "step": 7444 }, { "epoch": 0.6104842428900845, "grad_norm": 0.3775213360786438, "learning_rate": 3.606775989052083e-05, "loss": 2.7255, "step": 7446 }, { "epoch": 0.610648219318473, "grad_norm": 0.4092825651168823, "learning_rate": 3.6041722694004964e-05, "loss": 2.7224, "step": 7448 }, { "epoch": 0.6108121957468614, "grad_norm": 0.3705921769142151, "learning_rate": 3.6015689601887746e-05, "loss": 2.667, "step": 7450 }, { "epoch": 0.6109761721752498, "grad_norm": 0.39161479473114014, "learning_rate": 3.598966062182414e-05, "loss": 2.7119, "step": 7452 }, { "epoch": 0.6111401486036382, "grad_norm": 0.3508928120136261, "learning_rate": 3.596363576146787e-05, "loss": 2.7114, "step": 7454 }, { "epoch": 0.6113041250320267, "grad_norm": 0.3898743987083435, "learning_rate": 3.5937615028471536e-05, "loss": 2.7308, "step": 7456 }, { "epoch": 0.6114681014604151, "grad_norm": 0.40665403008461, "learning_rate": 3.591159843048645e-05, "loss": 2.6795, "step": 7458 }, { "epoch": 0.6116320778888035, "grad_norm": 0.39401838183403015, "learning_rate": 3.5885585975162706e-05, "loss": 2.7645, "step": 7460 }, { "epoch": 0.6117960543171919, "grad_norm": 0.39833781123161316, "learning_rate": 3.585957767014925e-05, "loss": 2.7333, "step": 7462 }, { "epoch": 0.6119600307455804, "grad_norm": 0.3788768947124481, "learning_rate": 3.583357352309374e-05, "loss": 2.7235, "step": 7464 }, { "epoch": 0.6121240071739688, "grad_norm": 0.3793696165084839, "learning_rate": 3.580757354164264e-05, "loss": 2.6877, "step": 7466 }, { "epoch": 0.6122879836023571, "grad_norm": 0.4045318067073822, "learning_rate": 3.578157773344118e-05, "loss": 2.6522, "step": 7468 }, { "epoch": 0.6124519600307455, "grad_norm": 0.426973432302475, "learning_rate": 3.5755586106133366e-05, "loss": 2.6703, "step": 7470 }, { "epoch": 0.612615936459134, "grad_norm": 0.4132937788963318, "learning_rate": 3.572959866736196e-05, "loss": 2.746, "step": 7472 }, { "epoch": 0.6127799128875224, "grad_norm": 0.38171377778053284, "learning_rate": 3.570361542476855e-05, "loss": 2.7185, "step": 7474 }, { "epoch": 0.6129438893159108, "grad_norm": 0.40772563219070435, "learning_rate": 3.56776363859934e-05, "loss": 2.7168, "step": 7476 }, { "epoch": 0.6131078657442992, "grad_norm": 0.4192030131816864, "learning_rate": 3.56516615586756e-05, "loss": 2.7037, "step": 7478 }, { "epoch": 0.6132718421726877, "grad_norm": 0.3770076632499695, "learning_rate": 3.5625690950453006e-05, "loss": 2.8116, "step": 7480 }, { "epoch": 0.6134358186010761, "grad_norm": 0.39449840784072876, "learning_rate": 3.559972456896221e-05, "loss": 2.7704, "step": 7482 }, { "epoch": 0.6135997950294645, "grad_norm": 0.39838895201683044, "learning_rate": 3.557376242183853e-05, "loss": 2.7817, "step": 7484 }, { "epoch": 0.6137637714578529, "grad_norm": 0.3995193541049957, "learning_rate": 3.554780451671612e-05, "loss": 2.6982, "step": 7486 }, { "epoch": 0.6139277478862414, "grad_norm": 0.3879450857639313, "learning_rate": 3.552185086122782e-05, "loss": 2.7279, "step": 7488 }, { "epoch": 0.6140917243146298, "grad_norm": 0.39906418323516846, "learning_rate": 3.549590146300524e-05, "loss": 2.698, "step": 7490 }, { "epoch": 0.6142557007430182, "grad_norm": 0.40238451957702637, "learning_rate": 3.546995632967875e-05, "loss": 2.7944, "step": 7492 }, { "epoch": 0.6144196771714067, "grad_norm": 0.3660317063331604, "learning_rate": 3.544401546887745e-05, "loss": 2.7049, "step": 7494 }, { "epoch": 0.614583653599795, "grad_norm": 0.39529040455818176, "learning_rate": 3.5418078888229166e-05, "loss": 2.7674, "step": 7496 }, { "epoch": 0.6147476300281834, "grad_norm": 0.41775986552238464, "learning_rate": 3.539214659536053e-05, "loss": 2.7501, "step": 7498 }, { "epoch": 0.6149116064565718, "grad_norm": 0.37571340799331665, "learning_rate": 3.536621859789685e-05, "loss": 2.7063, "step": 7500 }, { "epoch": 0.6150755828849603, "grad_norm": 0.37494418025016785, "learning_rate": 3.534029490346217e-05, "loss": 2.7466, "step": 7502 }, { "epoch": 0.6152395593133487, "grad_norm": 0.396066278219223, "learning_rate": 3.5314375519679345e-05, "loss": 2.7053, "step": 7504 }, { "epoch": 0.6154035357417371, "grad_norm": 0.4173499345779419, "learning_rate": 3.528846045416987e-05, "loss": 2.7273, "step": 7506 }, { "epoch": 0.6155675121701255, "grad_norm": 0.41211748123168945, "learning_rate": 3.5262549714554e-05, "loss": 2.6959, "step": 7508 }, { "epoch": 0.615731488598514, "grad_norm": 0.3835853934288025, "learning_rate": 3.523664330845077e-05, "loss": 2.7235, "step": 7510 }, { "epoch": 0.6158954650269024, "grad_norm": 0.3758532702922821, "learning_rate": 3.5210741243477876e-05, "loss": 2.6909, "step": 7512 }, { "epoch": 0.6160594414552908, "grad_norm": 0.3795211613178253, "learning_rate": 3.5184843527251745e-05, "loss": 2.7192, "step": 7514 }, { "epoch": 0.6162234178836792, "grad_norm": 0.3767675459384918, "learning_rate": 3.5158950167387564e-05, "loss": 2.7601, "step": 7516 }, { "epoch": 0.6163873943120677, "grad_norm": 0.39331692457199097, "learning_rate": 3.513306117149919e-05, "loss": 2.7386, "step": 7518 }, { "epoch": 0.6165513707404561, "grad_norm": 0.408627986907959, "learning_rate": 3.5107176547199224e-05, "loss": 2.6876, "step": 7520 }, { "epoch": 0.6167153471688445, "grad_norm": 0.4065479636192322, "learning_rate": 3.508129630209901e-05, "loss": 2.7143, "step": 7522 }, { "epoch": 0.6168793235972329, "grad_norm": 0.38085782527923584, "learning_rate": 3.505542044380855e-05, "loss": 2.7079, "step": 7524 }, { "epoch": 0.6170433000256214, "grad_norm": 0.39370179176330566, "learning_rate": 3.5029548979936556e-05, "loss": 2.7574, "step": 7526 }, { "epoch": 0.6172072764540097, "grad_norm": 0.3861037790775299, "learning_rate": 3.5003681918090514e-05, "loss": 2.7286, "step": 7528 }, { "epoch": 0.6173712528823981, "grad_norm": 0.3807239830493927, "learning_rate": 3.497781926587657e-05, "loss": 2.7274, "step": 7530 }, { "epoch": 0.6175352293107865, "grad_norm": 0.419541597366333, "learning_rate": 3.495196103089953e-05, "loss": 2.7384, "step": 7532 }, { "epoch": 0.617699205739175, "grad_norm": 0.415090411901474, "learning_rate": 3.492610722076299e-05, "loss": 2.7689, "step": 7534 }, { "epoch": 0.6178631821675634, "grad_norm": 0.4102879762649536, "learning_rate": 3.49002578430692e-05, "loss": 2.717, "step": 7536 }, { "epoch": 0.6180271585959518, "grad_norm": 0.38376542925834656, "learning_rate": 3.487441290541909e-05, "loss": 2.7256, "step": 7538 }, { "epoch": 0.6181911350243402, "grad_norm": 0.3987720012664795, "learning_rate": 3.484857241541232e-05, "loss": 2.7632, "step": 7540 }, { "epoch": 0.6183551114527287, "grad_norm": 0.3795928657054901, "learning_rate": 3.4822736380647214e-05, "loss": 2.7228, "step": 7542 }, { "epoch": 0.6185190878811171, "grad_norm": 0.3663689196109772, "learning_rate": 3.479690480872079e-05, "loss": 2.7262, "step": 7544 }, { "epoch": 0.6186830643095055, "grad_norm": 0.40121135115623474, "learning_rate": 3.4771077707228784e-05, "loss": 2.7431, "step": 7546 }, { "epoch": 0.618847040737894, "grad_norm": 0.37954187393188477, "learning_rate": 3.474525508376558e-05, "loss": 2.7436, "step": 7548 }, { "epoch": 0.6190110171662824, "grad_norm": 0.4009801745414734, "learning_rate": 3.471943694592425e-05, "loss": 2.7057, "step": 7550 }, { "epoch": 0.6191749935946708, "grad_norm": 0.38822048902511597, "learning_rate": 3.4693623301296584e-05, "loss": 2.7042, "step": 7552 }, { "epoch": 0.6193389700230592, "grad_norm": 0.36688941717147827, "learning_rate": 3.466781415747301e-05, "loss": 2.7661, "step": 7554 }, { "epoch": 0.6195029464514477, "grad_norm": 0.3665444552898407, "learning_rate": 3.464200952204264e-05, "loss": 2.6979, "step": 7556 }, { "epoch": 0.619666922879836, "grad_norm": 0.378578245639801, "learning_rate": 3.461620940259327e-05, "loss": 2.7311, "step": 7558 }, { "epoch": 0.6198308993082244, "grad_norm": 0.37778717279434204, "learning_rate": 3.459041380671136e-05, "loss": 2.7344, "step": 7560 }, { "epoch": 0.6199948757366128, "grad_norm": 0.3607989549636841, "learning_rate": 3.456462274198205e-05, "loss": 2.7406, "step": 7562 }, { "epoch": 0.6201588521650013, "grad_norm": 0.39484044909477234, "learning_rate": 3.453883621598915e-05, "loss": 2.7612, "step": 7564 }, { "epoch": 0.6203228285933897, "grad_norm": 0.3921862542629242, "learning_rate": 3.4513054236315125e-05, "loss": 2.7216, "step": 7566 }, { "epoch": 0.6204868050217781, "grad_norm": 0.39838817715644836, "learning_rate": 3.448727681054107e-05, "loss": 2.7325, "step": 7568 }, { "epoch": 0.6206507814501665, "grad_norm": 0.4075007438659668, "learning_rate": 3.4461503946246835e-05, "loss": 2.73, "step": 7570 }, { "epoch": 0.620814757878555, "grad_norm": 0.3774442970752716, "learning_rate": 3.4435735651010836e-05, "loss": 2.7211, "step": 7572 }, { "epoch": 0.6209787343069434, "grad_norm": 0.3737916052341461, "learning_rate": 3.440997193241016e-05, "loss": 2.7158, "step": 7574 }, { "epoch": 0.6211427107353318, "grad_norm": 0.377712607383728, "learning_rate": 3.438421279802062e-05, "loss": 2.7244, "step": 7576 }, { "epoch": 0.6213066871637202, "grad_norm": 0.38349977135658264, "learning_rate": 3.4358458255416603e-05, "loss": 2.7893, "step": 7578 }, { "epoch": 0.6214706635921087, "grad_norm": 0.39397573471069336, "learning_rate": 3.433270831217116e-05, "loss": 2.6496, "step": 7580 }, { "epoch": 0.6216346400204971, "grad_norm": 0.40148624777793884, "learning_rate": 3.430696297585602e-05, "loss": 2.6999, "step": 7582 }, { "epoch": 0.6217986164488855, "grad_norm": 0.38889259099960327, "learning_rate": 3.4281222254041525e-05, "loss": 2.8026, "step": 7584 }, { "epoch": 0.6219625928772738, "grad_norm": 0.3647606670856476, "learning_rate": 3.425548615429666e-05, "loss": 2.7333, "step": 7586 }, { "epoch": 0.6221265693056623, "grad_norm": 0.3934035301208496, "learning_rate": 3.422975468418911e-05, "loss": 2.7353, "step": 7588 }, { "epoch": 0.6222905457340507, "grad_norm": 0.37733393907546997, "learning_rate": 3.4204027851285114e-05, "loss": 2.7875, "step": 7590 }, { "epoch": 0.6224545221624391, "grad_norm": 0.38836774230003357, "learning_rate": 3.417830566314959e-05, "loss": 2.6995, "step": 7592 }, { "epoch": 0.6226184985908275, "grad_norm": 0.40056899189949036, "learning_rate": 3.415258812734612e-05, "loss": 2.7682, "step": 7594 }, { "epoch": 0.622782475019216, "grad_norm": 0.3677942454814911, "learning_rate": 3.412687525143685e-05, "loss": 2.6626, "step": 7596 }, { "epoch": 0.6229464514476044, "grad_norm": 0.3932241201400757, "learning_rate": 3.410116704298259e-05, "loss": 2.7325, "step": 7598 }, { "epoch": 0.6231104278759928, "grad_norm": 0.39545226097106934, "learning_rate": 3.407546350954281e-05, "loss": 2.7212, "step": 7600 }, { "epoch": 0.6232744043043812, "grad_norm": 0.3952122628688812, "learning_rate": 3.4049764658675556e-05, "loss": 2.7139, "step": 7602 }, { "epoch": 0.6234383807327697, "grad_norm": 0.380193829536438, "learning_rate": 3.4024070497937496e-05, "loss": 2.6838, "step": 7604 }, { "epoch": 0.6236023571611581, "grad_norm": 0.3770444989204407, "learning_rate": 3.399838103488397e-05, "loss": 2.7072, "step": 7606 }, { "epoch": 0.6237663335895465, "grad_norm": 0.3546270430088043, "learning_rate": 3.3972696277068885e-05, "loss": 2.7474, "step": 7608 }, { "epoch": 0.623930310017935, "grad_norm": 0.3864762485027313, "learning_rate": 3.394701623204477e-05, "loss": 2.706, "step": 7610 }, { "epoch": 0.6240942864463234, "grad_norm": 0.3852376937866211, "learning_rate": 3.392134090736282e-05, "loss": 2.6709, "step": 7612 }, { "epoch": 0.6242582628747118, "grad_norm": 0.3920847773551941, "learning_rate": 3.389567031057278e-05, "loss": 2.7288, "step": 7614 }, { "epoch": 0.6244222393031001, "grad_norm": 0.39720067381858826, "learning_rate": 3.387000444922301e-05, "loss": 2.7414, "step": 7616 }, { "epoch": 0.6245862157314886, "grad_norm": 0.384258508682251, "learning_rate": 3.384434333086054e-05, "loss": 2.6749, "step": 7618 }, { "epoch": 0.624750192159877, "grad_norm": 0.37747853994369507, "learning_rate": 3.381868696303094e-05, "loss": 2.7169, "step": 7620 }, { "epoch": 0.6249141685882654, "grad_norm": 0.38757556676864624, "learning_rate": 3.379303535327838e-05, "loss": 2.7312, "step": 7622 }, { "epoch": 0.6250781450166538, "grad_norm": 0.3916012942790985, "learning_rate": 3.376738850914571e-05, "loss": 2.7481, "step": 7624 }, { "epoch": 0.6252421214450423, "grad_norm": 0.37376511096954346, "learning_rate": 3.374174643817428e-05, "loss": 2.711, "step": 7626 }, { "epoch": 0.6254060978734307, "grad_norm": 0.39975661039352417, "learning_rate": 3.3716109147904094e-05, "loss": 2.7318, "step": 7628 }, { "epoch": 0.6255700743018191, "grad_norm": 0.41789987683296204, "learning_rate": 3.369047664587375e-05, "loss": 2.7321, "step": 7630 }, { "epoch": 0.6257340507302075, "grad_norm": 0.39743414521217346, "learning_rate": 3.3664848939620406e-05, "loss": 2.7413, "step": 7632 }, { "epoch": 0.625898027158596, "grad_norm": 0.3793678879737854, "learning_rate": 3.363922603667982e-05, "loss": 2.6591, "step": 7634 }, { "epoch": 0.6260620035869844, "grad_norm": 0.3701624274253845, "learning_rate": 3.3613607944586374e-05, "loss": 2.7254, "step": 7636 }, { "epoch": 0.6262259800153728, "grad_norm": 0.3606198728084564, "learning_rate": 3.3587994670873e-05, "loss": 2.7231, "step": 7638 }, { "epoch": 0.6263899564437612, "grad_norm": 0.376678466796875, "learning_rate": 3.356238622307119e-05, "loss": 2.7302, "step": 7640 }, { "epoch": 0.6265539328721497, "grad_norm": 0.4293898046016693, "learning_rate": 3.3536782608711105e-05, "loss": 2.6861, "step": 7642 }, { "epoch": 0.6267179093005381, "grad_norm": 0.4140755236148834, "learning_rate": 3.3511183835321394e-05, "loss": 2.694, "step": 7644 }, { "epoch": 0.6268818857289264, "grad_norm": 0.41748470067977905, "learning_rate": 3.348558991042929e-05, "loss": 2.7329, "step": 7646 }, { "epoch": 0.6270458621573148, "grad_norm": 0.3913920819759369, "learning_rate": 3.346000084156069e-05, "loss": 2.7399, "step": 7648 }, { "epoch": 0.6272098385857033, "grad_norm": 0.38955986499786377, "learning_rate": 3.343441663623995e-05, "loss": 2.6881, "step": 7650 }, { "epoch": 0.6273738150140917, "grad_norm": 0.38670194149017334, "learning_rate": 3.340883730199006e-05, "loss": 2.712, "step": 7652 }, { "epoch": 0.6275377914424801, "grad_norm": 0.38310298323631287, "learning_rate": 3.338326284633257e-05, "loss": 2.7457, "step": 7654 }, { "epoch": 0.6277017678708685, "grad_norm": 0.3795510232448578, "learning_rate": 3.335769327678759e-05, "loss": 2.748, "step": 7656 }, { "epoch": 0.627865744299257, "grad_norm": 0.37910598516464233, "learning_rate": 3.333212860087375e-05, "loss": 2.751, "step": 7658 }, { "epoch": 0.6280297207276454, "grad_norm": 0.37041279673576355, "learning_rate": 3.330656882610835e-05, "loss": 2.7115, "step": 7660 }, { "epoch": 0.6281936971560338, "grad_norm": 0.37564611434936523, "learning_rate": 3.328101396000714e-05, "loss": 2.6771, "step": 7662 }, { "epoch": 0.6283576735844223, "grad_norm": 0.37373894453048706, "learning_rate": 3.325546401008446e-05, "loss": 2.7083, "step": 7664 }, { "epoch": 0.6285216500128107, "grad_norm": 0.41689133644104004, "learning_rate": 3.3229918983853245e-05, "loss": 2.7567, "step": 7666 }, { "epoch": 0.6286856264411991, "grad_norm": 0.3869902193546295, "learning_rate": 3.3204378888824926e-05, "loss": 2.7269, "step": 7668 }, { "epoch": 0.6288496028695875, "grad_norm": 0.3627869188785553, "learning_rate": 3.317884373250953e-05, "loss": 2.7304, "step": 7670 }, { "epoch": 0.629013579297976, "grad_norm": 0.372374027967453, "learning_rate": 3.315331352241559e-05, "loss": 2.6885, "step": 7672 }, { "epoch": 0.6291775557263644, "grad_norm": 0.40186235308647156, "learning_rate": 3.3127788266050195e-05, "loss": 2.7233, "step": 7674 }, { "epoch": 0.6293415321547527, "grad_norm": 0.4125675857067108, "learning_rate": 3.3102267970919023e-05, "loss": 2.7048, "step": 7676 }, { "epoch": 0.6295055085831411, "grad_norm": 0.3935837745666504, "learning_rate": 3.307675264452624e-05, "loss": 2.7096, "step": 7678 }, { "epoch": 0.6296694850115296, "grad_norm": 0.4198305904865265, "learning_rate": 3.305124229437453e-05, "loss": 2.7227, "step": 7680 }, { "epoch": 0.629833461439918, "grad_norm": 0.3819190561771393, "learning_rate": 3.302573692796522e-05, "loss": 2.735, "step": 7682 }, { "epoch": 0.6299974378683064, "grad_norm": 0.38034242391586304, "learning_rate": 3.300023655279806e-05, "loss": 2.7015, "step": 7684 }, { "epoch": 0.6301614142966948, "grad_norm": 0.37957313656806946, "learning_rate": 3.297474117637136e-05, "loss": 2.6697, "step": 7686 }, { "epoch": 0.6303253907250833, "grad_norm": 0.3742494285106659, "learning_rate": 3.294925080618202e-05, "loss": 2.7666, "step": 7688 }, { "epoch": 0.6304893671534717, "grad_norm": 0.3573929965496063, "learning_rate": 3.292376544972541e-05, "loss": 2.729, "step": 7690 }, { "epoch": 0.6306533435818601, "grad_norm": 0.370976060628891, "learning_rate": 3.289828511449541e-05, "loss": 2.7811, "step": 7692 }, { "epoch": 0.6308173200102485, "grad_norm": 0.40317147970199585, "learning_rate": 3.28728098079845e-05, "loss": 2.7098, "step": 7694 }, { "epoch": 0.630981296438637, "grad_norm": 0.36356374621391296, "learning_rate": 3.28473395376836e-05, "loss": 2.7561, "step": 7696 }, { "epoch": 0.6311452728670254, "grad_norm": 0.4060159921646118, "learning_rate": 3.282187431108216e-05, "loss": 2.727, "step": 7698 }, { "epoch": 0.6313092492954138, "grad_norm": 0.38442477583885193, "learning_rate": 3.279641413566823e-05, "loss": 2.7332, "step": 7700 }, { "epoch": 0.6314732257238022, "grad_norm": 0.3743780851364136, "learning_rate": 3.2770959018928296e-05, "loss": 2.6629, "step": 7702 }, { "epoch": 0.6316372021521907, "grad_norm": 0.3966967463493347, "learning_rate": 3.2745508968347325e-05, "loss": 2.7197, "step": 7704 }, { "epoch": 0.631801178580579, "grad_norm": 0.37655702233314514, "learning_rate": 3.2720063991408924e-05, "loss": 2.6881, "step": 7706 }, { "epoch": 0.6319651550089674, "grad_norm": 0.3916068375110626, "learning_rate": 3.2694624095595084e-05, "loss": 2.7066, "step": 7708 }, { "epoch": 0.6321291314373558, "grad_norm": 0.3936549425125122, "learning_rate": 3.2669189288386325e-05, "loss": 2.7219, "step": 7710 }, { "epoch": 0.6322931078657443, "grad_norm": 0.3706667423248291, "learning_rate": 3.2643759577261744e-05, "loss": 2.6886, "step": 7712 }, { "epoch": 0.6324570842941327, "grad_norm": 0.3641817271709442, "learning_rate": 3.261833496969886e-05, "loss": 2.7684, "step": 7714 }, { "epoch": 0.6326210607225211, "grad_norm": 0.3743795156478882, "learning_rate": 3.259291547317372e-05, "loss": 2.6686, "step": 7716 }, { "epoch": 0.6327850371509095, "grad_norm": 0.3651292324066162, "learning_rate": 3.256750109516087e-05, "loss": 2.7053, "step": 7718 }, { "epoch": 0.632949013579298, "grad_norm": 0.3830392062664032, "learning_rate": 3.254209184313336e-05, "loss": 2.7814, "step": 7720 }, { "epoch": 0.6331129900076864, "grad_norm": 0.3942374587059021, "learning_rate": 3.251668772456268e-05, "loss": 2.6528, "step": 7722 }, { "epoch": 0.6332769664360748, "grad_norm": 0.41045206785202026, "learning_rate": 3.2491288746918903e-05, "loss": 2.7269, "step": 7724 }, { "epoch": 0.6334409428644633, "grad_norm": 0.41689518094062805, "learning_rate": 3.246589491767051e-05, "loss": 2.6991, "step": 7726 }, { "epoch": 0.6336049192928517, "grad_norm": 0.39930638670921326, "learning_rate": 3.2440506244284484e-05, "loss": 2.7544, "step": 7728 }, { "epoch": 0.6337688957212401, "grad_norm": 0.42437562346458435, "learning_rate": 3.241512273422635e-05, "loss": 2.7116, "step": 7730 }, { "epoch": 0.6339328721496285, "grad_norm": 0.42422398924827576, "learning_rate": 3.238974439496003e-05, "loss": 2.6951, "step": 7732 }, { "epoch": 0.634096848578017, "grad_norm": 0.42158007621765137, "learning_rate": 3.2364371233947964e-05, "loss": 2.7489, "step": 7734 }, { "epoch": 0.6342608250064053, "grad_norm": 0.3969306945800781, "learning_rate": 3.2339003258651104e-05, "loss": 2.7346, "step": 7736 }, { "epoch": 0.6344248014347937, "grad_norm": 0.40741413831710815, "learning_rate": 3.231364047652882e-05, "loss": 2.7646, "step": 7738 }, { "epoch": 0.6345887778631821, "grad_norm": 0.3900740146636963, "learning_rate": 3.228828289503898e-05, "loss": 2.7345, "step": 7740 }, { "epoch": 0.6347527542915706, "grad_norm": 0.3706361651420593, "learning_rate": 3.226293052163792e-05, "loss": 2.6459, "step": 7742 }, { "epoch": 0.634916730719959, "grad_norm": 0.38874951004981995, "learning_rate": 3.223758336378046e-05, "loss": 2.6441, "step": 7744 }, { "epoch": 0.6350807071483474, "grad_norm": 0.40067705512046814, "learning_rate": 3.2212241428919834e-05, "loss": 2.7441, "step": 7746 }, { "epoch": 0.6352446835767358, "grad_norm": 0.3729390501976013, "learning_rate": 3.2186904724507835e-05, "loss": 2.6632, "step": 7748 }, { "epoch": 0.6354086600051243, "grad_norm": 0.3921566605567932, "learning_rate": 3.216157325799463e-05, "loss": 2.7508, "step": 7750 }, { "epoch": 0.6355726364335127, "grad_norm": 0.39209845662117004, "learning_rate": 3.213624703682885e-05, "loss": 2.7631, "step": 7752 }, { "epoch": 0.6357366128619011, "grad_norm": 0.41270387172698975, "learning_rate": 3.211092606845766e-05, "loss": 2.6897, "step": 7754 }, { "epoch": 0.6359005892902895, "grad_norm": 0.4053170084953308, "learning_rate": 3.2085610360326614e-05, "loss": 2.6658, "step": 7756 }, { "epoch": 0.636064565718678, "grad_norm": 0.38600656390190125, "learning_rate": 3.206029991987972e-05, "loss": 2.7244, "step": 7758 }, { "epoch": 0.6362285421470664, "grad_norm": 0.4255266785621643, "learning_rate": 3.203499475455948e-05, "loss": 2.6988, "step": 7760 }, { "epoch": 0.6363925185754548, "grad_norm": 0.3940472900867462, "learning_rate": 3.200969487180681e-05, "loss": 2.7547, "step": 7762 }, { "epoch": 0.6365564950038431, "grad_norm": 0.4074536859989166, "learning_rate": 3.1984400279061054e-05, "loss": 2.737, "step": 7764 }, { "epoch": 0.6367204714322316, "grad_norm": 0.3944758176803589, "learning_rate": 3.195911098376007e-05, "loss": 2.7087, "step": 7766 }, { "epoch": 0.63688444786062, "grad_norm": 0.4005754888057709, "learning_rate": 3.19338269933401e-05, "loss": 2.738, "step": 7768 }, { "epoch": 0.6370484242890084, "grad_norm": 0.3666488230228424, "learning_rate": 3.1908548315235806e-05, "loss": 2.6789, "step": 7770 }, { "epoch": 0.6372124007173968, "grad_norm": 0.3705459237098694, "learning_rate": 3.1883274956880384e-05, "loss": 2.6495, "step": 7772 }, { "epoch": 0.6373763771457853, "grad_norm": 0.37399980425834656, "learning_rate": 3.1858006925705376e-05, "loss": 2.699, "step": 7774 }, { "epoch": 0.6375403535741737, "grad_norm": 0.3848254382610321, "learning_rate": 3.183274422914077e-05, "loss": 2.6965, "step": 7776 }, { "epoch": 0.6377043300025621, "grad_norm": 0.3763097822666168, "learning_rate": 3.1807486874615035e-05, "loss": 2.7206, "step": 7778 }, { "epoch": 0.6378683064309506, "grad_norm": 0.3757327198982239, "learning_rate": 3.1782234869555027e-05, "loss": 2.6724, "step": 7780 }, { "epoch": 0.638032282859339, "grad_norm": 0.3919960856437683, "learning_rate": 3.175698822138602e-05, "loss": 2.6877, "step": 7782 }, { "epoch": 0.6381962592877274, "grad_norm": 0.37129318714141846, "learning_rate": 3.173174693753175e-05, "loss": 2.6874, "step": 7784 }, { "epoch": 0.6383602357161158, "grad_norm": 0.4006279408931732, "learning_rate": 3.1706511025414365e-05, "loss": 2.6892, "step": 7786 }, { "epoch": 0.6385242121445043, "grad_norm": 0.37489134073257446, "learning_rate": 3.16812804924544e-05, "loss": 2.7256, "step": 7788 }, { "epoch": 0.6386881885728927, "grad_norm": 0.3848879933357239, "learning_rate": 3.1656055346070856e-05, "loss": 2.7451, "step": 7790 }, { "epoch": 0.6388521650012811, "grad_norm": 0.38539236783981323, "learning_rate": 3.163083559368113e-05, "loss": 2.7099, "step": 7792 }, { "epoch": 0.6390161414296694, "grad_norm": 0.3892047703266144, "learning_rate": 3.1605621242700986e-05, "loss": 2.7378, "step": 7794 }, { "epoch": 0.639180117858058, "grad_norm": 0.38269326090812683, "learning_rate": 3.158041230054471e-05, "loss": 2.7244, "step": 7796 }, { "epoch": 0.6393440942864463, "grad_norm": 0.43198147416114807, "learning_rate": 3.15552087746249e-05, "loss": 2.7968, "step": 7798 }, { "epoch": 0.6395080707148347, "grad_norm": 0.4099862277507782, "learning_rate": 3.1530010672352573e-05, "loss": 2.6968, "step": 7800 }, { "epoch": 0.6396720471432231, "grad_norm": 0.388921320438385, "learning_rate": 3.150481800113721e-05, "loss": 2.6984, "step": 7802 }, { "epoch": 0.6398360235716116, "grad_norm": 0.37909209728240967, "learning_rate": 3.147963076838665e-05, "loss": 2.7149, "step": 7804 }, { "epoch": 0.64, "grad_norm": 0.4047726094722748, "learning_rate": 3.1454448981507105e-05, "loss": 2.7068, "step": 7806 }, { "epoch": 0.6401639764283884, "grad_norm": 0.3683038353919983, "learning_rate": 3.142927264790327e-05, "loss": 2.7194, "step": 7808 }, { "epoch": 0.6403279528567768, "grad_norm": 0.3726213872432709, "learning_rate": 3.140410177497815e-05, "loss": 2.7206, "step": 7810 }, { "epoch": 0.6404919292851653, "grad_norm": 0.41055673360824585, "learning_rate": 3.137893637013318e-05, "loss": 2.6806, "step": 7812 }, { "epoch": 0.6406559057135537, "grad_norm": 0.42934727668762207, "learning_rate": 3.135377644076822e-05, "loss": 2.7264, "step": 7814 }, { "epoch": 0.6408198821419421, "grad_norm": 0.3882001042366028, "learning_rate": 3.132862199428147e-05, "loss": 2.6832, "step": 7816 }, { "epoch": 0.6409838585703305, "grad_norm": 0.3750775456428528, "learning_rate": 3.130347303806952e-05, "loss": 2.7212, "step": 7818 }, { "epoch": 0.641147834998719, "grad_norm": 0.36462724208831787, "learning_rate": 3.127832957952739e-05, "loss": 2.7495, "step": 7820 }, { "epoch": 0.6413118114271074, "grad_norm": 0.37588170170783997, "learning_rate": 3.125319162604844e-05, "loss": 2.6972, "step": 7822 }, { "epoch": 0.6414757878554957, "grad_norm": 0.3652655780315399, "learning_rate": 3.1228059185024415e-05, "loss": 2.7406, "step": 7824 }, { "epoch": 0.6416397642838841, "grad_norm": 0.3910474181175232, "learning_rate": 3.120293226384548e-05, "loss": 2.712, "step": 7826 }, { "epoch": 0.6418037407122726, "grad_norm": 0.35795921087265015, "learning_rate": 3.1177810869900125e-05, "loss": 2.7015, "step": 7828 }, { "epoch": 0.641967717140661, "grad_norm": 0.385695219039917, "learning_rate": 3.115269501057523e-05, "loss": 2.6667, "step": 7830 }, { "epoch": 0.6421316935690494, "grad_norm": 0.3897539973258972, "learning_rate": 3.112758469325609e-05, "loss": 2.7592, "step": 7832 }, { "epoch": 0.6422956699974379, "grad_norm": 0.3896655738353729, "learning_rate": 3.11024799253263e-05, "loss": 2.646, "step": 7834 }, { "epoch": 0.6424596464258263, "grad_norm": 0.36518627405166626, "learning_rate": 3.107738071416785e-05, "loss": 2.7515, "step": 7836 }, { "epoch": 0.6426236228542147, "grad_norm": 0.3937589228153229, "learning_rate": 3.1052287067161146e-05, "loss": 2.7264, "step": 7838 }, { "epoch": 0.6427875992826031, "grad_norm": 0.38585948944091797, "learning_rate": 3.102719899168489e-05, "loss": 2.6811, "step": 7840 }, { "epoch": 0.6429515757109916, "grad_norm": 0.427311509847641, "learning_rate": 3.100211649511615e-05, "loss": 2.7838, "step": 7842 }, { "epoch": 0.64311555213938, "grad_norm": 0.4227888882160187, "learning_rate": 3.097703958483043e-05, "loss": 2.6755, "step": 7844 }, { "epoch": 0.6432795285677684, "grad_norm": 0.3813656270503998, "learning_rate": 3.09519682682015e-05, "loss": 2.7564, "step": 7846 }, { "epoch": 0.6434435049961568, "grad_norm": 0.3923702836036682, "learning_rate": 3.092690255260151e-05, "loss": 2.7213, "step": 7848 }, { "epoch": 0.6436074814245453, "grad_norm": 0.3898935317993164, "learning_rate": 3.090184244540101e-05, "loss": 2.7167, "step": 7850 }, { "epoch": 0.6437714578529337, "grad_norm": 0.4096493124961853, "learning_rate": 3.087678795396886e-05, "loss": 2.7552, "step": 7852 }, { "epoch": 0.643935434281322, "grad_norm": 0.3937073349952698, "learning_rate": 3.085173908567224e-05, "loss": 2.7003, "step": 7854 }, { "epoch": 0.6440994107097104, "grad_norm": 0.37011292576789856, "learning_rate": 3.082669584787674e-05, "loss": 2.7193, "step": 7856 }, { "epoch": 0.6442633871380989, "grad_norm": 0.38424769043922424, "learning_rate": 3.080165824794627e-05, "loss": 2.6545, "step": 7858 }, { "epoch": 0.6444273635664873, "grad_norm": 0.3760468363761902, "learning_rate": 3.077662629324304e-05, "loss": 2.6959, "step": 7860 }, { "epoch": 0.6445913399948757, "grad_norm": 0.3764878511428833, "learning_rate": 3.075159999112769e-05, "loss": 2.6651, "step": 7862 }, { "epoch": 0.6447553164232641, "grad_norm": 0.3846738934516907, "learning_rate": 3.07265793489591e-05, "loss": 2.6702, "step": 7864 }, { "epoch": 0.6449192928516526, "grad_norm": 0.3727152347564697, "learning_rate": 3.0701564374094546e-05, "loss": 2.681, "step": 7866 }, { "epoch": 0.645083269280041, "grad_norm": 0.3666906952857971, "learning_rate": 3.067655507388965e-05, "loss": 2.7018, "step": 7868 }, { "epoch": 0.6452472457084294, "grad_norm": 0.37696129083633423, "learning_rate": 3.0651551455698314e-05, "loss": 2.7595, "step": 7870 }, { "epoch": 0.6454112221368178, "grad_norm": 0.4079241454601288, "learning_rate": 3.062655352687276e-05, "loss": 2.7567, "step": 7872 }, { "epoch": 0.6455751985652063, "grad_norm": 0.39961734414100647, "learning_rate": 3.060156129476364e-05, "loss": 2.7478, "step": 7874 }, { "epoch": 0.6457391749935947, "grad_norm": 0.3890273869037628, "learning_rate": 3.0576574766719814e-05, "loss": 2.7414, "step": 7876 }, { "epoch": 0.6459031514219831, "grad_norm": 0.3946554660797119, "learning_rate": 3.055159395008851e-05, "loss": 2.7703, "step": 7878 }, { "epoch": 0.6460671278503715, "grad_norm": 0.4062712490558624, "learning_rate": 3.052661885221531e-05, "loss": 2.7585, "step": 7880 }, { "epoch": 0.64623110427876, "grad_norm": 0.3980555832386017, "learning_rate": 3.050164948044406e-05, "loss": 2.6951, "step": 7882 }, { "epoch": 0.6463950807071484, "grad_norm": 0.4146850109100342, "learning_rate": 3.0476685842116932e-05, "loss": 2.731, "step": 7884 }, { "epoch": 0.6465590571355367, "grad_norm": 0.4119962453842163, "learning_rate": 3.045172794457446e-05, "loss": 2.6566, "step": 7886 }, { "epoch": 0.6467230335639251, "grad_norm": 0.40596380829811096, "learning_rate": 3.042677579515544e-05, "loss": 2.7325, "step": 7888 }, { "epoch": 0.6468870099923136, "grad_norm": 0.37548547983169556, "learning_rate": 3.0401829401196963e-05, "loss": 2.6471, "step": 7890 }, { "epoch": 0.647050986420702, "grad_norm": 0.3889716565608978, "learning_rate": 3.0376888770034506e-05, "loss": 2.6794, "step": 7892 }, { "epoch": 0.6472149628490904, "grad_norm": 0.4179765284061432, "learning_rate": 3.0351953909001784e-05, "loss": 2.6474, "step": 7894 }, { "epoch": 0.6473789392774789, "grad_norm": 0.39606013894081116, "learning_rate": 3.0327024825430827e-05, "loss": 2.7276, "step": 7896 }, { "epoch": 0.6475429157058673, "grad_norm": 0.36344513297080994, "learning_rate": 3.0302101526651973e-05, "loss": 2.7069, "step": 7898 }, { "epoch": 0.6477068921342557, "grad_norm": 0.364402711391449, "learning_rate": 3.0277184019993876e-05, "loss": 2.622, "step": 7900 }, { "epoch": 0.6478708685626441, "grad_norm": 0.37299656867980957, "learning_rate": 3.0252272312783448e-05, "loss": 2.7014, "step": 7902 }, { "epoch": 0.6480348449910326, "grad_norm": 0.3982824385166168, "learning_rate": 3.0227366412345937e-05, "loss": 2.7368, "step": 7904 }, { "epoch": 0.648198821419421, "grad_norm": 0.4151482582092285, "learning_rate": 3.0202466326004863e-05, "loss": 2.7334, "step": 7906 }, { "epoch": 0.6483627978478094, "grad_norm": 0.40278491377830505, "learning_rate": 3.0177572061082003e-05, "loss": 2.6538, "step": 7908 }, { "epoch": 0.6485267742761978, "grad_norm": 0.3986111581325531, "learning_rate": 3.0152683624897514e-05, "loss": 2.6906, "step": 7910 }, { "epoch": 0.6486907507045863, "grad_norm": 0.3914695084095001, "learning_rate": 3.0127801024769746e-05, "loss": 2.695, "step": 7912 }, { "epoch": 0.6488547271329747, "grad_norm": 0.39455148577690125, "learning_rate": 3.0102924268015365e-05, "loss": 2.7245, "step": 7914 }, { "epoch": 0.649018703561363, "grad_norm": 0.3723244369029999, "learning_rate": 3.0078053361949342e-05, "loss": 2.7177, "step": 7916 }, { "epoch": 0.6491826799897514, "grad_norm": 0.4205113351345062, "learning_rate": 3.005318831388491e-05, "loss": 2.71, "step": 7918 }, { "epoch": 0.6493466564181399, "grad_norm": 0.373691588640213, "learning_rate": 3.002832913113356e-05, "loss": 2.7145, "step": 7920 }, { "epoch": 0.6495106328465283, "grad_norm": 0.3899837136268616, "learning_rate": 3.0003475821005078e-05, "loss": 2.7439, "step": 7922 }, { "epoch": 0.6496746092749167, "grad_norm": 0.4084588587284088, "learning_rate": 2.997862839080755e-05, "loss": 2.7286, "step": 7924 }, { "epoch": 0.6498385857033051, "grad_norm": 0.3797529935836792, "learning_rate": 2.9953786847847264e-05, "loss": 2.6866, "step": 7926 }, { "epoch": 0.6500025621316936, "grad_norm": 0.417545884847641, "learning_rate": 2.9928951199428846e-05, "loss": 2.7426, "step": 7928 }, { "epoch": 0.650166538560082, "grad_norm": 0.37970849871635437, "learning_rate": 2.9904121452855155e-05, "loss": 2.6708, "step": 7930 }, { "epoch": 0.6503305149884704, "grad_norm": 0.3815860450267792, "learning_rate": 2.9879297615427294e-05, "loss": 2.7971, "step": 7932 }, { "epoch": 0.6504944914168588, "grad_norm": 0.37229830026626587, "learning_rate": 2.985447969444469e-05, "loss": 2.7025, "step": 7934 }, { "epoch": 0.6506584678452473, "grad_norm": 0.35849589109420776, "learning_rate": 2.982966769720499e-05, "loss": 2.7276, "step": 7936 }, { "epoch": 0.6508224442736357, "grad_norm": 0.3986893892288208, "learning_rate": 2.9804861631004065e-05, "loss": 2.7355, "step": 7938 }, { "epoch": 0.6509864207020241, "grad_norm": 0.40123265981674194, "learning_rate": 2.9780061503136124e-05, "loss": 2.6626, "step": 7940 }, { "epoch": 0.6511503971304125, "grad_norm": 0.3948691487312317, "learning_rate": 2.9755267320893576e-05, "loss": 2.693, "step": 7942 }, { "epoch": 0.651314373558801, "grad_norm": 0.36041921377182007, "learning_rate": 2.9730479091567082e-05, "loss": 2.7281, "step": 7944 }, { "epoch": 0.6514783499871893, "grad_norm": 0.4022723138332367, "learning_rate": 2.970569682244557e-05, "loss": 2.6919, "step": 7946 }, { "epoch": 0.6516423264155777, "grad_norm": 0.40095844864845276, "learning_rate": 2.9680920520816202e-05, "loss": 2.7113, "step": 7948 }, { "epoch": 0.6518063028439662, "grad_norm": 0.3731570541858673, "learning_rate": 2.9656150193964384e-05, "loss": 2.701, "step": 7950 }, { "epoch": 0.6519702792723546, "grad_norm": 0.37078115344047546, "learning_rate": 2.9631385849173797e-05, "loss": 2.7549, "step": 7952 }, { "epoch": 0.652134255700743, "grad_norm": 0.3733912706375122, "learning_rate": 2.960662749372633e-05, "loss": 2.7065, "step": 7954 }, { "epoch": 0.6522982321291314, "grad_norm": 0.38202810287475586, "learning_rate": 2.9581875134902093e-05, "loss": 2.7198, "step": 7956 }, { "epoch": 0.6524622085575199, "grad_norm": 0.4054269790649414, "learning_rate": 2.9557128779979493e-05, "loss": 2.6795, "step": 7958 }, { "epoch": 0.6526261849859083, "grad_norm": 0.3874515891075134, "learning_rate": 2.9532388436235127e-05, "loss": 2.6986, "step": 7960 }, { "epoch": 0.6527901614142967, "grad_norm": 0.35889798402786255, "learning_rate": 2.9507654110943813e-05, "loss": 2.7503, "step": 7962 }, { "epoch": 0.6529541378426851, "grad_norm": 0.38609278202056885, "learning_rate": 2.9482925811378658e-05, "loss": 2.7342, "step": 7964 }, { "epoch": 0.6531181142710736, "grad_norm": 0.38416317105293274, "learning_rate": 2.9458203544810935e-05, "loss": 2.6899, "step": 7966 }, { "epoch": 0.653282090699462, "grad_norm": 0.3454633057117462, "learning_rate": 2.943348731851018e-05, "loss": 2.708, "step": 7968 }, { "epoch": 0.6534460671278504, "grad_norm": 0.3578120768070221, "learning_rate": 2.940877713974414e-05, "loss": 2.7144, "step": 7970 }, { "epoch": 0.6536100435562388, "grad_norm": 0.351194828748703, "learning_rate": 2.938407301577878e-05, "loss": 2.6772, "step": 7972 }, { "epoch": 0.6537740199846273, "grad_norm": 0.37222763895988464, "learning_rate": 2.935937495387827e-05, "loss": 2.7184, "step": 7974 }, { "epoch": 0.6539379964130156, "grad_norm": 0.35520943999290466, "learning_rate": 2.933468296130507e-05, "loss": 2.7234, "step": 7976 }, { "epoch": 0.654101972841404, "grad_norm": 0.37274038791656494, "learning_rate": 2.930999704531976e-05, "loss": 2.7176, "step": 7978 }, { "epoch": 0.6542659492697924, "grad_norm": 0.36820128560066223, "learning_rate": 2.9285317213181173e-05, "loss": 2.6575, "step": 7980 }, { "epoch": 0.6544299256981809, "grad_norm": 0.4004935622215271, "learning_rate": 2.9260643472146387e-05, "loss": 2.7858, "step": 7982 }, { "epoch": 0.6545939021265693, "grad_norm": 0.38279664516448975, "learning_rate": 2.923597582947064e-05, "loss": 2.7268, "step": 7984 }, { "epoch": 0.6547578785549577, "grad_norm": 0.3946802020072937, "learning_rate": 2.9211314292407372e-05, "loss": 2.7099, "step": 7986 }, { "epoch": 0.6549218549833461, "grad_norm": 0.38687431812286377, "learning_rate": 2.9186658868208305e-05, "loss": 2.6559, "step": 7988 }, { "epoch": 0.6550858314117346, "grad_norm": 0.40961745381355286, "learning_rate": 2.916200956412325e-05, "loss": 2.6808, "step": 7990 }, { "epoch": 0.655249807840123, "grad_norm": 0.3674470782279968, "learning_rate": 2.9137366387400307e-05, "loss": 2.7769, "step": 7992 }, { "epoch": 0.6554137842685114, "grad_norm": 0.40013325214385986, "learning_rate": 2.911272934528574e-05, "loss": 2.6931, "step": 7994 }, { "epoch": 0.6555777606968998, "grad_norm": 0.3776344656944275, "learning_rate": 2.908809844502403e-05, "loss": 2.679, "step": 7996 }, { "epoch": 0.6557417371252883, "grad_norm": 0.3463280200958252, "learning_rate": 2.906347369385778e-05, "loss": 2.6805, "step": 7998 }, { "epoch": 0.6559057135536767, "grad_norm": 0.36267954111099243, "learning_rate": 2.9038855099027917e-05, "loss": 2.6864, "step": 8000 }, { "epoch": 0.656069689982065, "grad_norm": 0.36124834418296814, "learning_rate": 2.901424266777343e-05, "loss": 2.7348, "step": 8002 }, { "epoch": 0.6562336664104534, "grad_norm": 0.35316163301467896, "learning_rate": 2.8989636407331554e-05, "loss": 2.6707, "step": 8004 }, { "epoch": 0.6563976428388419, "grad_norm": 0.3646197021007538, "learning_rate": 2.8965036324937722e-05, "loss": 2.7284, "step": 8006 }, { "epoch": 0.6565616192672303, "grad_norm": 0.3676811158657074, "learning_rate": 2.8940442427825503e-05, "loss": 2.7328, "step": 8008 }, { "epoch": 0.6567255956956187, "grad_norm": 0.3920822739601135, "learning_rate": 2.8915854723226688e-05, "loss": 2.7163, "step": 8010 }, { "epoch": 0.6568895721240072, "grad_norm": 0.40281441807746887, "learning_rate": 2.8891273218371228e-05, "loss": 2.6922, "step": 8012 }, { "epoch": 0.6570535485523956, "grad_norm": 0.4011118710041046, "learning_rate": 2.8866697920487283e-05, "loss": 2.7137, "step": 8014 }, { "epoch": 0.657217524980784, "grad_norm": 0.4061340391635895, "learning_rate": 2.8842128836801097e-05, "loss": 2.6876, "step": 8016 }, { "epoch": 0.6573815014091724, "grad_norm": 0.39007076621055603, "learning_rate": 2.881756597453724e-05, "loss": 2.6738, "step": 8018 }, { "epoch": 0.6575454778375609, "grad_norm": 0.418149471282959, "learning_rate": 2.8793009340918298e-05, "loss": 2.724, "step": 8020 }, { "epoch": 0.6577094542659493, "grad_norm": 0.35326430201530457, "learning_rate": 2.876845894316511e-05, "loss": 2.6498, "step": 8022 }, { "epoch": 0.6578734306943377, "grad_norm": 0.41558533906936646, "learning_rate": 2.874391478849666e-05, "loss": 2.7526, "step": 8024 }, { "epoch": 0.6580374071227261, "grad_norm": 0.4501861333847046, "learning_rate": 2.8719376884130133e-05, "loss": 2.6614, "step": 8026 }, { "epoch": 0.6582013835511146, "grad_norm": 0.4039250910282135, "learning_rate": 2.8694845237280788e-05, "loss": 2.7159, "step": 8028 }, { "epoch": 0.658365359979503, "grad_norm": 0.4324546754360199, "learning_rate": 2.867031985516212e-05, "loss": 2.7088, "step": 8030 }, { "epoch": 0.6585293364078914, "grad_norm": 0.43278759717941284, "learning_rate": 2.8645800744985792e-05, "loss": 2.6581, "step": 8032 }, { "epoch": 0.6586933128362797, "grad_norm": 0.37865668535232544, "learning_rate": 2.8621287913961514e-05, "loss": 2.7509, "step": 8034 }, { "epoch": 0.6588572892646682, "grad_norm": 0.38785696029663086, "learning_rate": 2.859678136929731e-05, "loss": 2.7084, "step": 8036 }, { "epoch": 0.6590212656930566, "grad_norm": 0.3775040805339813, "learning_rate": 2.8572281118199225e-05, "loss": 2.7025, "step": 8038 }, { "epoch": 0.659185242121445, "grad_norm": 0.3656921684741974, "learning_rate": 2.854778716787151e-05, "loss": 2.6934, "step": 8040 }, { "epoch": 0.6593492185498334, "grad_norm": 0.4260038733482361, "learning_rate": 2.8523299525516546e-05, "loss": 2.6636, "step": 8042 }, { "epoch": 0.6595131949782219, "grad_norm": 0.38450589776039124, "learning_rate": 2.8498818198334897e-05, "loss": 2.6663, "step": 8044 }, { "epoch": 0.6596771714066103, "grad_norm": 0.370313823223114, "learning_rate": 2.8474343193525206e-05, "loss": 2.6922, "step": 8046 }, { "epoch": 0.6598411478349987, "grad_norm": 0.39956822991371155, "learning_rate": 2.8449874518284298e-05, "loss": 2.6659, "step": 8048 }, { "epoch": 0.6600051242633871, "grad_norm": 0.4072279930114746, "learning_rate": 2.842541217980713e-05, "loss": 2.7305, "step": 8050 }, { "epoch": 0.6601691006917756, "grad_norm": 0.3876146376132965, "learning_rate": 2.8400956185286805e-05, "loss": 2.7112, "step": 8052 }, { "epoch": 0.660333077120164, "grad_norm": 0.3958224058151245, "learning_rate": 2.837650654191456e-05, "loss": 2.7004, "step": 8054 }, { "epoch": 0.6604970535485524, "grad_norm": 0.4068087339401245, "learning_rate": 2.8352063256879724e-05, "loss": 2.7071, "step": 8056 }, { "epoch": 0.6606610299769408, "grad_norm": 0.35557734966278076, "learning_rate": 2.83276263373698e-05, "loss": 2.679, "step": 8058 }, { "epoch": 0.6608250064053293, "grad_norm": 0.36318376660346985, "learning_rate": 2.8303195790570415e-05, "loss": 2.6958, "step": 8060 }, { "epoch": 0.6609889828337177, "grad_norm": 0.3749483525753021, "learning_rate": 2.8278771623665323e-05, "loss": 2.7769, "step": 8062 }, { "epoch": 0.661152959262106, "grad_norm": 0.3773966431617737, "learning_rate": 2.825435384383634e-05, "loss": 2.7026, "step": 8064 }, { "epoch": 0.6613169356904945, "grad_norm": 0.3807618021965027, "learning_rate": 2.8229942458263538e-05, "loss": 2.7566, "step": 8066 }, { "epoch": 0.6614809121188829, "grad_norm": 0.38911864161491394, "learning_rate": 2.8205537474124965e-05, "loss": 2.747, "step": 8068 }, { "epoch": 0.6616448885472713, "grad_norm": 0.36372971534729004, "learning_rate": 2.818113889859687e-05, "loss": 2.6829, "step": 8070 }, { "epoch": 0.6618088649756597, "grad_norm": 0.3598827123641968, "learning_rate": 2.8156746738853613e-05, "loss": 2.7073, "step": 8072 }, { "epoch": 0.6619728414040482, "grad_norm": 0.36627793312072754, "learning_rate": 2.8132361002067613e-05, "loss": 2.6839, "step": 8074 }, { "epoch": 0.6621368178324366, "grad_norm": 0.36478564143180847, "learning_rate": 2.810798169540947e-05, "loss": 2.6743, "step": 8076 }, { "epoch": 0.662300794260825, "grad_norm": 0.34423142671585083, "learning_rate": 2.808360882604784e-05, "loss": 2.6963, "step": 8078 }, { "epoch": 0.6624647706892134, "grad_norm": 0.3476889431476593, "learning_rate": 2.8059242401149544e-05, "loss": 2.6932, "step": 8080 }, { "epoch": 0.6626287471176019, "grad_norm": 0.3661099374294281, "learning_rate": 2.803488242787941e-05, "loss": 2.7451, "step": 8082 }, { "epoch": 0.6627927235459903, "grad_norm": 0.37179726362228394, "learning_rate": 2.801052891340051e-05, "loss": 2.6449, "step": 8084 }, { "epoch": 0.6629566999743787, "grad_norm": 0.3675248324871063, "learning_rate": 2.798618186487387e-05, "loss": 2.7142, "step": 8086 }, { "epoch": 0.6631206764027671, "grad_norm": 0.3806789815425873, "learning_rate": 2.7961841289458713e-05, "loss": 2.6497, "step": 8088 }, { "epoch": 0.6632846528311556, "grad_norm": 0.3967260420322418, "learning_rate": 2.7937507194312323e-05, "loss": 2.7152, "step": 8090 }, { "epoch": 0.663448629259544, "grad_norm": 0.3586444556713104, "learning_rate": 2.7913179586590104e-05, "loss": 2.7196, "step": 8092 }, { "epoch": 0.6636126056879323, "grad_norm": 0.3600021302700043, "learning_rate": 2.788885847344549e-05, "loss": 2.6905, "step": 8094 }, { "epoch": 0.6637765821163207, "grad_norm": 0.40098971128463745, "learning_rate": 2.7864543862030074e-05, "loss": 2.7104, "step": 8096 }, { "epoch": 0.6639405585447092, "grad_norm": 0.4165302813053131, "learning_rate": 2.7840235759493506e-05, "loss": 2.7167, "step": 8098 }, { "epoch": 0.6641045349730976, "grad_norm": 0.39745283126831055, "learning_rate": 2.781593417298352e-05, "loss": 2.7079, "step": 8100 }, { "epoch": 0.664268511401486, "grad_norm": 0.36143335700035095, "learning_rate": 2.7791639109645972e-05, "loss": 2.764, "step": 8102 }, { "epoch": 0.6644324878298744, "grad_norm": 0.36410221457481384, "learning_rate": 2.7767350576624717e-05, "loss": 2.6782, "step": 8104 }, { "epoch": 0.6645964642582629, "grad_norm": 0.40479522943496704, "learning_rate": 2.774306858106177e-05, "loss": 2.7308, "step": 8106 }, { "epoch": 0.6647604406866513, "grad_norm": 0.3978186845779419, "learning_rate": 2.7718793130097187e-05, "loss": 2.6905, "step": 8108 }, { "epoch": 0.6649244171150397, "grad_norm": 0.39073604345321655, "learning_rate": 2.769452423086914e-05, "loss": 2.7269, "step": 8110 }, { "epoch": 0.6650883935434281, "grad_norm": 0.34713536500930786, "learning_rate": 2.7670261890513773e-05, "loss": 2.6669, "step": 8112 }, { "epoch": 0.6652523699718166, "grad_norm": 0.37141019105911255, "learning_rate": 2.764600611616544e-05, "loss": 2.7137, "step": 8114 }, { "epoch": 0.665416346400205, "grad_norm": 0.39449837803840637, "learning_rate": 2.7621756914956455e-05, "loss": 2.7047, "step": 8116 }, { "epoch": 0.6655803228285934, "grad_norm": 0.3795781433582306, "learning_rate": 2.7597514294017245e-05, "loss": 2.6987, "step": 8118 }, { "epoch": 0.6657442992569819, "grad_norm": 0.3567329943180084, "learning_rate": 2.757327826047632e-05, "loss": 2.7415, "step": 8120 }, { "epoch": 0.6659082756853703, "grad_norm": 0.3718640208244324, "learning_rate": 2.7549048821460187e-05, "loss": 2.6568, "step": 8122 }, { "epoch": 0.6660722521137586, "grad_norm": 0.378045916557312, "learning_rate": 2.7524825984093472e-05, "loss": 2.6979, "step": 8124 }, { "epoch": 0.666236228542147, "grad_norm": 0.35238373279571533, "learning_rate": 2.7500609755498857e-05, "loss": 2.7156, "step": 8126 }, { "epoch": 0.6664002049705355, "grad_norm": 0.39142218232154846, "learning_rate": 2.7476400142797077e-05, "loss": 2.7348, "step": 8128 }, { "epoch": 0.6665641813989239, "grad_norm": 0.38561588525772095, "learning_rate": 2.745219715310685e-05, "loss": 2.7155, "step": 8130 }, { "epoch": 0.6667281578273123, "grad_norm": 0.3654249906539917, "learning_rate": 2.742800079354509e-05, "loss": 2.776, "step": 8132 }, { "epoch": 0.6668921342557007, "grad_norm": 0.36338138580322266, "learning_rate": 2.740381107122662e-05, "loss": 2.6734, "step": 8134 }, { "epoch": 0.6670561106840892, "grad_norm": 0.38027507066726685, "learning_rate": 2.7379627993264395e-05, "loss": 2.6938, "step": 8136 }, { "epoch": 0.6672200871124776, "grad_norm": 0.38524648547172546, "learning_rate": 2.7355451566769385e-05, "loss": 2.7491, "step": 8138 }, { "epoch": 0.667384063540866, "grad_norm": 0.37056267261505127, "learning_rate": 2.733128179885064e-05, "loss": 2.7196, "step": 8140 }, { "epoch": 0.6675480399692544, "grad_norm": 0.37558913230895996, "learning_rate": 2.7307118696615174e-05, "loss": 2.7008, "step": 8142 }, { "epoch": 0.6677120163976429, "grad_norm": 0.3649221658706665, "learning_rate": 2.7282962267168122e-05, "loss": 2.7059, "step": 8144 }, { "epoch": 0.6678759928260313, "grad_norm": 0.37081843614578247, "learning_rate": 2.7258812517612632e-05, "loss": 2.6639, "step": 8146 }, { "epoch": 0.6680399692544197, "grad_norm": 0.3784293234348297, "learning_rate": 2.7234669455049834e-05, "loss": 2.7312, "step": 8148 }, { "epoch": 0.668203945682808, "grad_norm": 0.3668130040168762, "learning_rate": 2.7210533086579005e-05, "loss": 2.7087, "step": 8150 }, { "epoch": 0.6683679221111966, "grad_norm": 0.3724210262298584, "learning_rate": 2.718640341929734e-05, "loss": 2.7433, "step": 8152 }, { "epoch": 0.668531898539585, "grad_norm": 0.37186020612716675, "learning_rate": 2.7162280460300127e-05, "loss": 2.7301, "step": 8154 }, { "epoch": 0.6686958749679733, "grad_norm": 0.376110315322876, "learning_rate": 2.7138164216680662e-05, "loss": 2.6791, "step": 8156 }, { "epoch": 0.6688598513963617, "grad_norm": 0.36955568194389343, "learning_rate": 2.711405469553029e-05, "loss": 2.6705, "step": 8158 }, { "epoch": 0.6690238278247502, "grad_norm": 0.3734026551246643, "learning_rate": 2.70899519039383e-05, "loss": 2.6618, "step": 8160 }, { "epoch": 0.6691878042531386, "grad_norm": 0.38206690549850464, "learning_rate": 2.706585584899214e-05, "loss": 2.7474, "step": 8162 }, { "epoch": 0.669351780681527, "grad_norm": 0.40900060534477234, "learning_rate": 2.7041766537777145e-05, "loss": 2.749, "step": 8164 }, { "epoch": 0.6695157571099154, "grad_norm": 0.3761042654514313, "learning_rate": 2.701768397737674e-05, "loss": 2.6532, "step": 8166 }, { "epoch": 0.6696797335383039, "grad_norm": 0.361674964427948, "learning_rate": 2.6993608174872355e-05, "loss": 2.7127, "step": 8168 }, { "epoch": 0.6698437099666923, "grad_norm": 0.37260809540748596, "learning_rate": 2.696953913734339e-05, "loss": 2.6408, "step": 8170 }, { "epoch": 0.6700076863950807, "grad_norm": 0.3890365660190582, "learning_rate": 2.6945476871867302e-05, "loss": 2.6665, "step": 8172 }, { "epoch": 0.6701716628234691, "grad_norm": 0.35239818692207336, "learning_rate": 2.6921421385519562e-05, "loss": 2.6262, "step": 8174 }, { "epoch": 0.6703356392518576, "grad_norm": 0.368145227432251, "learning_rate": 2.6897372685373634e-05, "loss": 2.7065, "step": 8176 }, { "epoch": 0.670499615680246, "grad_norm": 0.37635675072669983, "learning_rate": 2.6873330778500915e-05, "loss": 2.6372, "step": 8178 }, { "epoch": 0.6706635921086344, "grad_norm": 0.3594423830509186, "learning_rate": 2.684929567197097e-05, "loss": 2.757, "step": 8180 }, { "epoch": 0.6708275685370229, "grad_norm": 0.39035844802856445, "learning_rate": 2.6825267372851193e-05, "loss": 2.6989, "step": 8182 }, { "epoch": 0.6709915449654112, "grad_norm": 0.36963799595832825, "learning_rate": 2.6801245888207076e-05, "loss": 2.6732, "step": 8184 }, { "epoch": 0.6711555213937996, "grad_norm": 0.4206686317920685, "learning_rate": 2.6777231225102072e-05, "loss": 2.701, "step": 8186 }, { "epoch": 0.671319497822188, "grad_norm": 0.36891210079193115, "learning_rate": 2.6753223390597666e-05, "loss": 2.7115, "step": 8188 }, { "epoch": 0.6714834742505765, "grad_norm": 0.36788102984428406, "learning_rate": 2.6729222391753255e-05, "loss": 2.6402, "step": 8190 }, { "epoch": 0.6716474506789649, "grad_norm": 0.3817219138145447, "learning_rate": 2.67052282356263e-05, "loss": 2.7658, "step": 8192 }, { "epoch": 0.6718114271073533, "grad_norm": 0.35640057921409607, "learning_rate": 2.6681240929272254e-05, "loss": 2.7745, "step": 8194 }, { "epoch": 0.6719754035357417, "grad_norm": 0.3562081754207611, "learning_rate": 2.6657260479744462e-05, "loss": 2.664, "step": 8196 }, { "epoch": 0.6721393799641302, "grad_norm": 0.37609589099884033, "learning_rate": 2.663328689409439e-05, "loss": 2.6658, "step": 8198 }, { "epoch": 0.6723033563925186, "grad_norm": 0.39180171489715576, "learning_rate": 2.6609320179371367e-05, "loss": 2.708, "step": 8200 }, { "epoch": 0.672467332820907, "grad_norm": 0.36664754152297974, "learning_rate": 2.6585360342622766e-05, "loss": 2.651, "step": 8202 }, { "epoch": 0.6726313092492954, "grad_norm": 0.38205891847610474, "learning_rate": 2.6561407390893916e-05, "loss": 2.692, "step": 8204 }, { "epoch": 0.6727952856776839, "grad_norm": 0.4301818013191223, "learning_rate": 2.6537461331228153e-05, "loss": 2.7361, "step": 8206 }, { "epoch": 0.6729592621060723, "grad_norm": 0.39791837334632874, "learning_rate": 2.6513522170666717e-05, "loss": 2.7151, "step": 8208 }, { "epoch": 0.6731232385344607, "grad_norm": 0.3955062925815582, "learning_rate": 2.648958991624888e-05, "loss": 2.7527, "step": 8210 }, { "epoch": 0.673287214962849, "grad_norm": 0.3791442811489105, "learning_rate": 2.6465664575011868e-05, "loss": 2.7517, "step": 8212 }, { "epoch": 0.6734511913912375, "grad_norm": 0.3849659264087677, "learning_rate": 2.6441746153990865e-05, "loss": 2.6752, "step": 8214 }, { "epoch": 0.6736151678196259, "grad_norm": 0.3792452812194824, "learning_rate": 2.6417834660219054e-05, "loss": 2.6883, "step": 8216 }, { "epoch": 0.6737791442480143, "grad_norm": 0.3837055265903473, "learning_rate": 2.6393930100727515e-05, "loss": 2.6416, "step": 8218 }, { "epoch": 0.6739431206764027, "grad_norm": 0.38943207263946533, "learning_rate": 2.6370032482545337e-05, "loss": 2.7423, "step": 8220 }, { "epoch": 0.6741070971047912, "grad_norm": 0.36709269881248474, "learning_rate": 2.6346141812699572e-05, "loss": 2.7491, "step": 8222 }, { "epoch": 0.6742710735331796, "grad_norm": 0.42113515734672546, "learning_rate": 2.6322258098215224e-05, "loss": 2.6578, "step": 8224 }, { "epoch": 0.674435049961568, "grad_norm": 0.411876380443573, "learning_rate": 2.6298381346115186e-05, "loss": 2.7246, "step": 8226 }, { "epoch": 0.6745990263899564, "grad_norm": 0.4018315076828003, "learning_rate": 2.6274511563420445e-05, "loss": 2.6599, "step": 8228 }, { "epoch": 0.6747630028183449, "grad_norm": 0.41591763496398926, "learning_rate": 2.625064875714979e-05, "loss": 2.699, "step": 8230 }, { "epoch": 0.6749269792467333, "grad_norm": 0.4027625620365143, "learning_rate": 2.6226792934320044e-05, "loss": 2.7217, "step": 8232 }, { "epoch": 0.6750909556751217, "grad_norm": 0.37466877698898315, "learning_rate": 2.6202944101945968e-05, "loss": 2.68, "step": 8234 }, { "epoch": 0.6752549321035102, "grad_norm": 0.38026630878448486, "learning_rate": 2.6179102267040235e-05, "loss": 2.6948, "step": 8236 }, { "epoch": 0.6754189085318986, "grad_norm": 0.39000222086906433, "learning_rate": 2.6155267436613483e-05, "loss": 2.7283, "step": 8238 }, { "epoch": 0.675582884960287, "grad_norm": 0.3974981904029846, "learning_rate": 2.6131439617674303e-05, "loss": 2.6694, "step": 8240 }, { "epoch": 0.6757468613886753, "grad_norm": 0.38007107377052307, "learning_rate": 2.6107618817229212e-05, "loss": 2.6856, "step": 8242 }, { "epoch": 0.6759108378170638, "grad_norm": 0.3747228980064392, "learning_rate": 2.608380504228261e-05, "loss": 2.691, "step": 8244 }, { "epoch": 0.6760748142454522, "grad_norm": 0.3980855941772461, "learning_rate": 2.6059998299836957e-05, "loss": 2.7035, "step": 8246 }, { "epoch": 0.6762387906738406, "grad_norm": 0.37124818563461304, "learning_rate": 2.6036198596892515e-05, "loss": 2.6648, "step": 8248 }, { "epoch": 0.676402767102229, "grad_norm": 0.38610053062438965, "learning_rate": 2.6012405940447548e-05, "loss": 2.7189, "step": 8250 }, { "epoch": 0.6765667435306175, "grad_norm": 0.38325104117393494, "learning_rate": 2.5988620337498233e-05, "loss": 2.7064, "step": 8252 }, { "epoch": 0.6767307199590059, "grad_norm": 0.3818075358867645, "learning_rate": 2.5964841795038685e-05, "loss": 2.7091, "step": 8254 }, { "epoch": 0.6768946963873943, "grad_norm": 0.3741518259048462, "learning_rate": 2.5941070320060895e-05, "loss": 2.6557, "step": 8256 }, { "epoch": 0.6770586728157827, "grad_norm": 0.3564099967479706, "learning_rate": 2.5917305919554824e-05, "loss": 2.632, "step": 8258 }, { "epoch": 0.6772226492441712, "grad_norm": 0.3514571487903595, "learning_rate": 2.589354860050836e-05, "loss": 2.7041, "step": 8260 }, { "epoch": 0.6773866256725596, "grad_norm": 0.38728657364845276, "learning_rate": 2.586979836990723e-05, "loss": 2.7169, "step": 8262 }, { "epoch": 0.677550602100948, "grad_norm": 0.38779333233833313, "learning_rate": 2.584605523473521e-05, "loss": 2.7581, "step": 8264 }, { "epoch": 0.6777145785293364, "grad_norm": 0.38863226771354675, "learning_rate": 2.5822319201973855e-05, "loss": 2.7207, "step": 8266 }, { "epoch": 0.6778785549577249, "grad_norm": 0.38649144768714905, "learning_rate": 2.579859027860271e-05, "loss": 2.6717, "step": 8268 }, { "epoch": 0.6780425313861133, "grad_norm": 0.3792441785335541, "learning_rate": 2.5774868471599202e-05, "loss": 2.7173, "step": 8270 }, { "epoch": 0.6782065078145016, "grad_norm": 0.38163819909095764, "learning_rate": 2.5751153787938697e-05, "loss": 2.658, "step": 8272 }, { "epoch": 0.67837048424289, "grad_norm": 0.3687981069087982, "learning_rate": 2.5727446234594387e-05, "loss": 2.7493, "step": 8274 }, { "epoch": 0.6785344606712785, "grad_norm": 0.39426374435424805, "learning_rate": 2.57037458185375e-05, "loss": 2.725, "step": 8276 }, { "epoch": 0.6786984370996669, "grad_norm": 0.36612024903297424, "learning_rate": 2.5680052546737027e-05, "loss": 2.709, "step": 8278 }, { "epoch": 0.6788624135280553, "grad_norm": 0.35995176434516907, "learning_rate": 2.565636642615994e-05, "loss": 2.7274, "step": 8280 }, { "epoch": 0.6790263899564437, "grad_norm": 0.3847874104976654, "learning_rate": 2.56326874637711e-05, "loss": 2.6745, "step": 8282 }, { "epoch": 0.6791903663848322, "grad_norm": 0.37445297837257385, "learning_rate": 2.5609015666533214e-05, "loss": 2.6481, "step": 8284 }, { "epoch": 0.6793543428132206, "grad_norm": 0.3875288963317871, "learning_rate": 2.558535104140694e-05, "loss": 2.7548, "step": 8286 }, { "epoch": 0.679518319241609, "grad_norm": 0.37544742226600647, "learning_rate": 2.5561693595350812e-05, "loss": 2.7183, "step": 8288 }, { "epoch": 0.6796822956699974, "grad_norm": 0.39676231145858765, "learning_rate": 2.5538043335321248e-05, "loss": 2.7058, "step": 8290 }, { "epoch": 0.6798462720983859, "grad_norm": 0.39057162404060364, "learning_rate": 2.5514400268272513e-05, "loss": 2.7093, "step": 8292 }, { "epoch": 0.6800102485267743, "grad_norm": 0.3795698285102844, "learning_rate": 2.5490764401156862e-05, "loss": 2.6772, "step": 8294 }, { "epoch": 0.6801742249551627, "grad_norm": 0.3786967992782593, "learning_rate": 2.5467135740924308e-05, "loss": 2.7112, "step": 8296 }, { "epoch": 0.6803382013835512, "grad_norm": 0.3857060670852661, "learning_rate": 2.544351429452283e-05, "loss": 2.6722, "step": 8298 }, { "epoch": 0.6805021778119396, "grad_norm": 0.36618056893348694, "learning_rate": 2.541990006889825e-05, "loss": 2.6766, "step": 8300 }, { "epoch": 0.680666154240328, "grad_norm": 0.39972805976867676, "learning_rate": 2.5396293070994297e-05, "loss": 2.7417, "step": 8302 }, { "epoch": 0.6808301306687163, "grad_norm": 0.40740692615509033, "learning_rate": 2.5372693307752514e-05, "loss": 2.6673, "step": 8304 }, { "epoch": 0.6809941070971048, "grad_norm": 0.39190182089805603, "learning_rate": 2.5349100786112377e-05, "loss": 2.6808, "step": 8306 }, { "epoch": 0.6811580835254932, "grad_norm": 0.4014313220977783, "learning_rate": 2.532551551301123e-05, "loss": 2.7375, "step": 8308 }, { "epoch": 0.6813220599538816, "grad_norm": 0.3573625087738037, "learning_rate": 2.530193749538422e-05, "loss": 2.6694, "step": 8310 }, { "epoch": 0.68148603638227, "grad_norm": 0.3863253891468048, "learning_rate": 2.5278366740164466e-05, "loss": 2.6942, "step": 8312 }, { "epoch": 0.6816500128106585, "grad_norm": 0.38585901260375977, "learning_rate": 2.5254803254282844e-05, "loss": 2.6705, "step": 8314 }, { "epoch": 0.6818139892390469, "grad_norm": 0.3723987936973572, "learning_rate": 2.5231247044668166e-05, "loss": 2.7009, "step": 8316 }, { "epoch": 0.6819779656674353, "grad_norm": 0.4075051546096802, "learning_rate": 2.520769811824707e-05, "loss": 2.6906, "step": 8318 }, { "epoch": 0.6821419420958237, "grad_norm": 0.38526657223701477, "learning_rate": 2.5184156481944086e-05, "loss": 2.6751, "step": 8320 }, { "epoch": 0.6823059185242122, "grad_norm": 0.36364448070526123, "learning_rate": 2.5160622142681546e-05, "loss": 2.6927, "step": 8322 }, { "epoch": 0.6824698949526006, "grad_norm": 0.370485782623291, "learning_rate": 2.513709510737967e-05, "loss": 2.6772, "step": 8324 }, { "epoch": 0.682633871380989, "grad_norm": 0.3559088408946991, "learning_rate": 2.5113575382956546e-05, "loss": 2.6345, "step": 8326 }, { "epoch": 0.6827978478093774, "grad_norm": 0.40118610858917236, "learning_rate": 2.5090062976328076e-05, "loss": 2.7016, "step": 8328 }, { "epoch": 0.6829618242377659, "grad_norm": 0.37945353984832764, "learning_rate": 2.506655789440806e-05, "loss": 2.6757, "step": 8330 }, { "epoch": 0.6831258006661542, "grad_norm": 0.3832918703556061, "learning_rate": 2.5043060144108078e-05, "loss": 2.7603, "step": 8332 }, { "epoch": 0.6832897770945426, "grad_norm": 0.3587692677974701, "learning_rate": 2.501956973233759e-05, "loss": 2.7212, "step": 8334 }, { "epoch": 0.683453753522931, "grad_norm": 0.3815983235836029, "learning_rate": 2.4996086666003898e-05, "loss": 2.7249, "step": 8336 }, { "epoch": 0.6836177299513195, "grad_norm": 0.3645575940608978, "learning_rate": 2.497261095201218e-05, "loss": 2.721, "step": 8338 }, { "epoch": 0.6837817063797079, "grad_norm": 0.38928934931755066, "learning_rate": 2.494914259726534e-05, "loss": 2.6733, "step": 8340 }, { "epoch": 0.6839456828080963, "grad_norm": 0.37325677275657654, "learning_rate": 2.492568160866428e-05, "loss": 2.7085, "step": 8342 }, { "epoch": 0.6841096592364847, "grad_norm": 0.3798007369041443, "learning_rate": 2.490222799310758e-05, "loss": 2.658, "step": 8344 }, { "epoch": 0.6842736356648732, "grad_norm": 0.3864426612854004, "learning_rate": 2.4878781757491754e-05, "loss": 2.7088, "step": 8346 }, { "epoch": 0.6844376120932616, "grad_norm": 0.3895781338214874, "learning_rate": 2.4855342908711116e-05, "loss": 2.6388, "step": 8348 }, { "epoch": 0.68460158852165, "grad_norm": 0.3879987299442291, "learning_rate": 2.4831911453657774e-05, "loss": 2.7558, "step": 8350 }, { "epoch": 0.6847655649500385, "grad_norm": 0.3584182560443878, "learning_rate": 2.4808487399221715e-05, "loss": 2.701, "step": 8352 }, { "epoch": 0.6849295413784269, "grad_norm": 0.3610781133174896, "learning_rate": 2.4785070752290722e-05, "loss": 2.7193, "step": 8354 }, { "epoch": 0.6850935178068153, "grad_norm": 0.40857601165771484, "learning_rate": 2.476166151975042e-05, "loss": 2.698, "step": 8356 }, { "epoch": 0.6852574942352037, "grad_norm": 0.3872397840023041, "learning_rate": 2.4738259708484196e-05, "loss": 2.7307, "step": 8358 }, { "epoch": 0.6854214706635922, "grad_norm": 0.36924323439598083, "learning_rate": 2.471486532537336e-05, "loss": 2.6702, "step": 8360 }, { "epoch": 0.6855854470919805, "grad_norm": 0.3698207437992096, "learning_rate": 2.4691478377296924e-05, "loss": 2.6859, "step": 8362 }, { "epoch": 0.6857494235203689, "grad_norm": 0.3924945592880249, "learning_rate": 2.466809887113178e-05, "loss": 2.6889, "step": 8364 }, { "epoch": 0.6859133999487573, "grad_norm": 0.399661660194397, "learning_rate": 2.4644726813752622e-05, "loss": 2.6925, "step": 8366 }, { "epoch": 0.6860773763771458, "grad_norm": 0.3852911591529846, "learning_rate": 2.462136221203196e-05, "loss": 2.6954, "step": 8368 }, { "epoch": 0.6862413528055342, "grad_norm": 0.39803236722946167, "learning_rate": 2.4598005072840074e-05, "loss": 2.7448, "step": 8370 }, { "epoch": 0.6864053292339226, "grad_norm": 0.39910784363746643, "learning_rate": 2.4574655403045087e-05, "loss": 2.6747, "step": 8372 }, { "epoch": 0.686569305662311, "grad_norm": 0.3956886827945709, "learning_rate": 2.4551313209512917e-05, "loss": 2.704, "step": 8374 }, { "epoch": 0.6867332820906995, "grad_norm": 0.39390331506729126, "learning_rate": 2.452797849910728e-05, "loss": 2.6991, "step": 8376 }, { "epoch": 0.6868972585190879, "grad_norm": 0.3898649513721466, "learning_rate": 2.450465127868971e-05, "loss": 2.6889, "step": 8378 }, { "epoch": 0.6870612349474763, "grad_norm": 0.3891480565071106, "learning_rate": 2.4481331555119485e-05, "loss": 2.6686, "step": 8380 }, { "epoch": 0.6872252113758647, "grad_norm": 0.3815426826477051, "learning_rate": 2.4458019335253734e-05, "loss": 2.7233, "step": 8382 }, { "epoch": 0.6873891878042532, "grad_norm": 0.406530499458313, "learning_rate": 2.4434714625947363e-05, "loss": 2.7453, "step": 8384 }, { "epoch": 0.6875531642326416, "grad_norm": 0.3807404041290283, "learning_rate": 2.441141743405308e-05, "loss": 2.6957, "step": 8386 }, { "epoch": 0.68771714066103, "grad_norm": 0.3976084589958191, "learning_rate": 2.438812776642132e-05, "loss": 2.69, "step": 8388 }, { "epoch": 0.6878811170894183, "grad_norm": 0.38501620292663574, "learning_rate": 2.4364845629900423e-05, "loss": 2.6682, "step": 8390 }, { "epoch": 0.6880450935178068, "grad_norm": 0.4026597738265991, "learning_rate": 2.4341571031336395e-05, "loss": 2.7205, "step": 8392 }, { "epoch": 0.6882090699461952, "grad_norm": 0.3994022607803345, "learning_rate": 2.4318303977573097e-05, "loss": 2.7379, "step": 8394 }, { "epoch": 0.6883730463745836, "grad_norm": 0.40519949793815613, "learning_rate": 2.4295044475452167e-05, "loss": 2.6898, "step": 8396 }, { "epoch": 0.688537022802972, "grad_norm": 0.36324363946914673, "learning_rate": 2.427179253181297e-05, "loss": 2.7469, "step": 8398 }, { "epoch": 0.6887009992313605, "grad_norm": 0.38369739055633545, "learning_rate": 2.4248548153492702e-05, "loss": 2.677, "step": 8400 }, { "epoch": 0.6888649756597489, "grad_norm": 0.4025323987007141, "learning_rate": 2.4225311347326317e-05, "loss": 2.7053, "step": 8402 }, { "epoch": 0.6890289520881373, "grad_norm": 0.3958849608898163, "learning_rate": 2.4202082120146573e-05, "loss": 2.7344, "step": 8404 }, { "epoch": 0.6891929285165258, "grad_norm": 0.37935763597488403, "learning_rate": 2.4178860478783903e-05, "loss": 2.6673, "step": 8406 }, { "epoch": 0.6893569049449142, "grad_norm": 0.40843576192855835, "learning_rate": 2.4155646430066653e-05, "loss": 2.7021, "step": 8408 }, { "epoch": 0.6895208813733026, "grad_norm": 0.43301036953926086, "learning_rate": 2.4132439980820805e-05, "loss": 2.7339, "step": 8410 }, { "epoch": 0.689684857801691, "grad_norm": 0.3878119885921478, "learning_rate": 2.4109241137870176e-05, "loss": 2.7106, "step": 8412 }, { "epoch": 0.6898488342300795, "grad_norm": 0.3935549855232239, "learning_rate": 2.4086049908036335e-05, "loss": 2.7239, "step": 8414 }, { "epoch": 0.6900128106584679, "grad_norm": 0.3892782926559448, "learning_rate": 2.4062866298138626e-05, "loss": 2.7227, "step": 8416 }, { "epoch": 0.6901767870868563, "grad_norm": 0.40126466751098633, "learning_rate": 2.4039690314994096e-05, "loss": 2.6684, "step": 8418 }, { "epoch": 0.6903407635152446, "grad_norm": 0.40310603380203247, "learning_rate": 2.401652196541761e-05, "loss": 2.7113, "step": 8420 }, { "epoch": 0.6905047399436332, "grad_norm": 0.3945203423500061, "learning_rate": 2.3993361256221784e-05, "loss": 2.7171, "step": 8422 }, { "epoch": 0.6906687163720215, "grad_norm": 0.39180508255958557, "learning_rate": 2.3970208194216914e-05, "loss": 2.7298, "step": 8424 }, { "epoch": 0.6908326928004099, "grad_norm": 0.38912150263786316, "learning_rate": 2.3947062786211183e-05, "loss": 2.6408, "step": 8426 }, { "epoch": 0.6909966692287983, "grad_norm": 0.37896761298179626, "learning_rate": 2.3923925039010375e-05, "loss": 2.7209, "step": 8428 }, { "epoch": 0.6911606456571868, "grad_norm": 0.38243967294692993, "learning_rate": 2.3900794959418123e-05, "loss": 2.6873, "step": 8430 }, { "epoch": 0.6913246220855752, "grad_norm": 0.3906749486923218, "learning_rate": 2.3877672554235765e-05, "loss": 2.6535, "step": 8432 }, { "epoch": 0.6914885985139636, "grad_norm": 0.3702877461910248, "learning_rate": 2.385455783026241e-05, "loss": 2.6838, "step": 8434 }, { "epoch": 0.691652574942352, "grad_norm": 0.42546260356903076, "learning_rate": 2.383145079429483e-05, "loss": 2.7067, "step": 8436 }, { "epoch": 0.6918165513707405, "grad_norm": 0.38497427105903625, "learning_rate": 2.380835145312767e-05, "loss": 2.6411, "step": 8438 }, { "epoch": 0.6919805277991289, "grad_norm": 0.3927631378173828, "learning_rate": 2.3785259813553186e-05, "loss": 2.66, "step": 8440 }, { "epoch": 0.6921445042275173, "grad_norm": 0.40803998708724976, "learning_rate": 2.3762175882361437e-05, "loss": 2.7613, "step": 8442 }, { "epoch": 0.6923084806559057, "grad_norm": 0.3995896875858307, "learning_rate": 2.3739099666340215e-05, "loss": 2.6944, "step": 8444 }, { "epoch": 0.6924724570842942, "grad_norm": 0.39971452951431274, "learning_rate": 2.3716031172274988e-05, "loss": 2.6926, "step": 8446 }, { "epoch": 0.6926364335126826, "grad_norm": 0.3683062195777893, "learning_rate": 2.3692970406949017e-05, "loss": 2.7307, "step": 8448 }, { "epoch": 0.692800409941071, "grad_norm": 0.38081446290016174, "learning_rate": 2.3669917377143258e-05, "loss": 2.7179, "step": 8450 }, { "epoch": 0.6929643863694593, "grad_norm": 0.39257097244262695, "learning_rate": 2.364687208963643e-05, "loss": 2.7109, "step": 8452 }, { "epoch": 0.6931283627978478, "grad_norm": 0.38061755895614624, "learning_rate": 2.3623834551204876e-05, "loss": 2.7346, "step": 8454 }, { "epoch": 0.6932923392262362, "grad_norm": 0.3543435037136078, "learning_rate": 2.360080476862282e-05, "loss": 2.7067, "step": 8456 }, { "epoch": 0.6934563156546246, "grad_norm": 0.3647612929344177, "learning_rate": 2.357778274866204e-05, "loss": 2.6797, "step": 8458 }, { "epoch": 0.693620292083013, "grad_norm": 0.36694076657295227, "learning_rate": 2.355476849809215e-05, "loss": 2.7077, "step": 8460 }, { "epoch": 0.6937842685114015, "grad_norm": 0.35562363266944885, "learning_rate": 2.3531762023680415e-05, "loss": 2.7323, "step": 8462 }, { "epoch": 0.6939482449397899, "grad_norm": 0.34998953342437744, "learning_rate": 2.3508763332191864e-05, "loss": 2.749, "step": 8464 }, { "epoch": 0.6941122213681783, "grad_norm": 0.3599771559238434, "learning_rate": 2.3485772430389168e-05, "loss": 2.6812, "step": 8466 }, { "epoch": 0.6942761977965668, "grad_norm": 0.3789721429347992, "learning_rate": 2.3462789325032764e-05, "loss": 2.6888, "step": 8468 }, { "epoch": 0.6944401742249552, "grad_norm": 0.3616909086704254, "learning_rate": 2.3439814022880803e-05, "loss": 2.7342, "step": 8470 }, { "epoch": 0.6946041506533436, "grad_norm": 0.376828134059906, "learning_rate": 2.3416846530689064e-05, "loss": 2.683, "step": 8472 }, { "epoch": 0.694768127081732, "grad_norm": 0.37655776739120483, "learning_rate": 2.339388685521115e-05, "loss": 2.6618, "step": 8474 }, { "epoch": 0.6949321035101205, "grad_norm": 0.3913043141365051, "learning_rate": 2.3370935003198253e-05, "loss": 2.7671, "step": 8476 }, { "epoch": 0.6950960799385089, "grad_norm": 0.3889150023460388, "learning_rate": 2.3347990981399325e-05, "loss": 2.7254, "step": 8478 }, { "epoch": 0.6952600563668972, "grad_norm": 0.3825925588607788, "learning_rate": 2.3325054796561007e-05, "loss": 2.7199, "step": 8480 }, { "epoch": 0.6954240327952856, "grad_norm": 0.3712370991706848, "learning_rate": 2.3302126455427643e-05, "loss": 2.6727, "step": 8482 }, { "epoch": 0.6955880092236741, "grad_norm": 0.3657374680042267, "learning_rate": 2.327920596474122e-05, "loss": 2.7316, "step": 8484 }, { "epoch": 0.6957519856520625, "grad_norm": 0.36833930015563965, "learning_rate": 2.3256293331241474e-05, "loss": 2.7217, "step": 8486 }, { "epoch": 0.6959159620804509, "grad_norm": 0.37160244584083557, "learning_rate": 2.3233388561665813e-05, "loss": 2.7181, "step": 8488 }, { "epoch": 0.6960799385088393, "grad_norm": 0.38849589228630066, "learning_rate": 2.3210491662749335e-05, "loss": 2.7136, "step": 8490 }, { "epoch": 0.6962439149372278, "grad_norm": 0.3978149890899658, "learning_rate": 2.3187602641224826e-05, "loss": 2.6372, "step": 8492 }, { "epoch": 0.6964078913656162, "grad_norm": 0.385545551776886, "learning_rate": 2.316472150382272e-05, "loss": 2.658, "step": 8494 }, { "epoch": 0.6965718677940046, "grad_norm": 0.3721058666706085, "learning_rate": 2.314184825727118e-05, "loss": 2.657, "step": 8496 }, { "epoch": 0.696735844222393, "grad_norm": 0.36420297622680664, "learning_rate": 2.3118982908296032e-05, "loss": 2.7071, "step": 8498 }, { "epoch": 0.6968998206507815, "grad_norm": 0.3748766779899597, "learning_rate": 2.3096125463620793e-05, "loss": 2.7168, "step": 8500 }, { "epoch": 0.6970637970791699, "grad_norm": 0.3913448452949524, "learning_rate": 2.3073275929966587e-05, "loss": 2.6669, "step": 8502 }, { "epoch": 0.6972277735075583, "grad_norm": 0.4111196994781494, "learning_rate": 2.3050434314052337e-05, "loss": 2.7442, "step": 8504 }, { "epoch": 0.6973917499359467, "grad_norm": 0.3807835280895233, "learning_rate": 2.3027600622594515e-05, "loss": 2.69, "step": 8506 }, { "epoch": 0.6975557263643352, "grad_norm": 0.37266266345977783, "learning_rate": 2.3004774862307326e-05, "loss": 2.7033, "step": 8508 }, { "epoch": 0.6977197027927236, "grad_norm": 0.3839258551597595, "learning_rate": 2.298195703990266e-05, "loss": 2.7077, "step": 8510 }, { "epoch": 0.6978836792211119, "grad_norm": 0.3883345127105713, "learning_rate": 2.295914716209e-05, "loss": 2.6701, "step": 8512 }, { "epoch": 0.6980476556495003, "grad_norm": 0.3678521513938904, "learning_rate": 2.293634523557655e-05, "loss": 2.7117, "step": 8514 }, { "epoch": 0.6982116320778888, "grad_norm": 0.37389418482780457, "learning_rate": 2.291355126706718e-05, "loss": 2.7516, "step": 8516 }, { "epoch": 0.6983756085062772, "grad_norm": 0.3578051030635834, "learning_rate": 2.2890765263264408e-05, "loss": 2.6889, "step": 8518 }, { "epoch": 0.6985395849346656, "grad_norm": 0.3696204125881195, "learning_rate": 2.286798723086835e-05, "loss": 2.6647, "step": 8520 }, { "epoch": 0.6987035613630541, "grad_norm": 0.36636796593666077, "learning_rate": 2.2845217176576912e-05, "loss": 2.6858, "step": 8522 }, { "epoch": 0.6988675377914425, "grad_norm": 0.39986589550971985, "learning_rate": 2.2822455107085518e-05, "loss": 2.695, "step": 8524 }, { "epoch": 0.6990315142198309, "grad_norm": 0.3809782862663269, "learning_rate": 2.2799701029087312e-05, "loss": 2.6948, "step": 8526 }, { "epoch": 0.6991954906482193, "grad_norm": 0.39051175117492676, "learning_rate": 2.277695494927309e-05, "loss": 2.7204, "step": 8528 }, { "epoch": 0.6993594670766078, "grad_norm": 0.36137253046035767, "learning_rate": 2.275421687433129e-05, "loss": 2.6864, "step": 8530 }, { "epoch": 0.6995234435049962, "grad_norm": 0.3832628130912781, "learning_rate": 2.273148681094796e-05, "loss": 2.7347, "step": 8532 }, { "epoch": 0.6996874199333846, "grad_norm": 0.3860900402069092, "learning_rate": 2.2708764765806838e-05, "loss": 2.7036, "step": 8534 }, { "epoch": 0.699851396361773, "grad_norm": 0.37296178936958313, "learning_rate": 2.268605074558931e-05, "loss": 2.698, "step": 8536 }, { "epoch": 0.7000153727901615, "grad_norm": 0.3842230439186096, "learning_rate": 2.2663344756974318e-05, "loss": 2.679, "step": 8538 }, { "epoch": 0.7001793492185499, "grad_norm": 0.376590371131897, "learning_rate": 2.2640646806638584e-05, "loss": 2.7453, "step": 8540 }, { "epoch": 0.7003433256469382, "grad_norm": 0.4014396369457245, "learning_rate": 2.2617956901256326e-05, "loss": 2.6497, "step": 8542 }, { "epoch": 0.7005073020753266, "grad_norm": 0.3732002377510071, "learning_rate": 2.2595275047499482e-05, "loss": 2.7022, "step": 8544 }, { "epoch": 0.7006712785037151, "grad_norm": 0.3643403649330139, "learning_rate": 2.2572601252037595e-05, "loss": 2.7281, "step": 8546 }, { "epoch": 0.7008352549321035, "grad_norm": 0.3596980571746826, "learning_rate": 2.254993552153786e-05, "loss": 2.6838, "step": 8548 }, { "epoch": 0.7009992313604919, "grad_norm": 0.35688257217407227, "learning_rate": 2.252727786266502e-05, "loss": 2.6995, "step": 8550 }, { "epoch": 0.7011632077888803, "grad_norm": 0.3599395453929901, "learning_rate": 2.250462828208159e-05, "loss": 2.6761, "step": 8552 }, { "epoch": 0.7013271842172688, "grad_norm": 0.3549034297466278, "learning_rate": 2.2481986786447568e-05, "loss": 2.6499, "step": 8554 }, { "epoch": 0.7014911606456572, "grad_norm": 0.36290597915649414, "learning_rate": 2.245935338242065e-05, "loss": 2.7377, "step": 8556 }, { "epoch": 0.7016551370740456, "grad_norm": 0.38815751671791077, "learning_rate": 2.2436728076656155e-05, "loss": 2.6821, "step": 8558 }, { "epoch": 0.701819113502434, "grad_norm": 0.3571638762950897, "learning_rate": 2.2414110875806958e-05, "loss": 2.7128, "step": 8560 }, { "epoch": 0.7019830899308225, "grad_norm": 0.36378180980682373, "learning_rate": 2.239150178652362e-05, "loss": 2.6604, "step": 8562 }, { "epoch": 0.7021470663592109, "grad_norm": 0.3422406315803528, "learning_rate": 2.2368900815454287e-05, "loss": 2.7672, "step": 8564 }, { "epoch": 0.7023110427875993, "grad_norm": 0.3636515438556671, "learning_rate": 2.234630796924474e-05, "loss": 2.6835, "step": 8566 }, { "epoch": 0.7024750192159877, "grad_norm": 0.35900112986564636, "learning_rate": 2.2323723254538297e-05, "loss": 2.7209, "step": 8568 }, { "epoch": 0.7026389956443762, "grad_norm": 0.3729889988899231, "learning_rate": 2.2301146677976015e-05, "loss": 2.6812, "step": 8570 }, { "epoch": 0.7028029720727645, "grad_norm": 0.3816934823989868, "learning_rate": 2.2278578246196425e-05, "loss": 2.6851, "step": 8572 }, { "epoch": 0.7029669485011529, "grad_norm": 0.3625105321407318, "learning_rate": 2.2256017965835747e-05, "loss": 2.6295, "step": 8574 }, { "epoch": 0.7031309249295413, "grad_norm": 0.36865508556365967, "learning_rate": 2.2233465843527766e-05, "loss": 2.7283, "step": 8576 }, { "epoch": 0.7032949013579298, "grad_norm": 0.37290316820144653, "learning_rate": 2.221092188590391e-05, "loss": 2.7066, "step": 8578 }, { "epoch": 0.7034588777863182, "grad_norm": 0.36878013610839844, "learning_rate": 2.218838609959314e-05, "loss": 2.6875, "step": 8580 }, { "epoch": 0.7036228542147066, "grad_norm": 0.3616272211074829, "learning_rate": 2.216585849122206e-05, "loss": 2.6473, "step": 8582 }, { "epoch": 0.7037868306430951, "grad_norm": 0.3588869869709015, "learning_rate": 2.2143339067414887e-05, "loss": 2.6438, "step": 8584 }, { "epoch": 0.7039508070714835, "grad_norm": 0.3697204291820526, "learning_rate": 2.2120827834793345e-05, "loss": 2.682, "step": 8586 }, { "epoch": 0.7041147834998719, "grad_norm": 0.3795563578605652, "learning_rate": 2.2098324799976883e-05, "loss": 2.6897, "step": 8588 }, { "epoch": 0.7042787599282603, "grad_norm": 0.39331692457199097, "learning_rate": 2.207582996958242e-05, "loss": 2.6888, "step": 8590 }, { "epoch": 0.7044427363566488, "grad_norm": 0.39076825976371765, "learning_rate": 2.205334335022451e-05, "loss": 2.7245, "step": 8592 }, { "epoch": 0.7046067127850372, "grad_norm": 0.36164453625679016, "learning_rate": 2.20308649485153e-05, "loss": 2.7087, "step": 8594 }, { "epoch": 0.7047706892134256, "grad_norm": 0.36285480856895447, "learning_rate": 2.200839477106453e-05, "loss": 2.7544, "step": 8596 }, { "epoch": 0.704934665641814, "grad_norm": 0.40612390637397766, "learning_rate": 2.198593282447946e-05, "loss": 2.701, "step": 8598 }, { "epoch": 0.7050986420702025, "grad_norm": 0.3629796802997589, "learning_rate": 2.1963479115365e-05, "loss": 2.7245, "step": 8600 }, { "epoch": 0.7052626184985908, "grad_norm": 0.4071548581123352, "learning_rate": 2.1941033650323605e-05, "loss": 2.6904, "step": 8602 }, { "epoch": 0.7054265949269792, "grad_norm": 0.41032174229621887, "learning_rate": 2.1918596435955315e-05, "loss": 2.7452, "step": 8604 }, { "epoch": 0.7055905713553676, "grad_norm": 0.35707178711891174, "learning_rate": 2.189616747885775e-05, "loss": 2.6882, "step": 8606 }, { "epoch": 0.7057545477837561, "grad_norm": 0.37731117010116577, "learning_rate": 2.1873746785626063e-05, "loss": 2.6776, "step": 8608 }, { "epoch": 0.7059185242121445, "grad_norm": 0.3739245533943176, "learning_rate": 2.1851334362853023e-05, "loss": 2.7326, "step": 8610 }, { "epoch": 0.7060825006405329, "grad_norm": 0.3722072243690491, "learning_rate": 2.1828930217128947e-05, "loss": 2.7326, "step": 8612 }, { "epoch": 0.7062464770689213, "grad_norm": 0.37407660484313965, "learning_rate": 2.1806534355041746e-05, "loss": 2.6822, "step": 8614 }, { "epoch": 0.7064104534973098, "grad_norm": 0.3598092794418335, "learning_rate": 2.1784146783176807e-05, "loss": 2.6872, "step": 8616 }, { "epoch": 0.7065744299256982, "grad_norm": 0.34601491689682007, "learning_rate": 2.1761767508117225e-05, "loss": 2.6579, "step": 8618 }, { "epoch": 0.7067384063540866, "grad_norm": 0.3648231625556946, "learning_rate": 2.1739396536443512e-05, "loss": 2.7005, "step": 8620 }, { "epoch": 0.706902382782475, "grad_norm": 0.3847854733467102, "learning_rate": 2.1717033874733827e-05, "loss": 2.6606, "step": 8622 }, { "epoch": 0.7070663592108635, "grad_norm": 0.37499886751174927, "learning_rate": 2.169467952956386e-05, "loss": 2.6549, "step": 8624 }, { "epoch": 0.7072303356392519, "grad_norm": 0.37262287735939026, "learning_rate": 2.1672333507506837e-05, "loss": 2.7011, "step": 8626 }, { "epoch": 0.7073943120676403, "grad_norm": 0.3709699213504791, "learning_rate": 2.1649995815133555e-05, "loss": 2.6961, "step": 8628 }, { "epoch": 0.7075582884960286, "grad_norm": 0.3597058951854706, "learning_rate": 2.1627666459012365e-05, "loss": 2.6823, "step": 8630 }, { "epoch": 0.7077222649244171, "grad_norm": 0.37352320551872253, "learning_rate": 2.1605345445709185e-05, "loss": 2.6511, "step": 8632 }, { "epoch": 0.7078862413528055, "grad_norm": 0.357758492231369, "learning_rate": 2.1583032781787393e-05, "loss": 2.6536, "step": 8634 }, { "epoch": 0.7080502177811939, "grad_norm": 0.36342495679855347, "learning_rate": 2.156072847380806e-05, "loss": 2.6883, "step": 8636 }, { "epoch": 0.7082141942095824, "grad_norm": 0.36827418208122253, "learning_rate": 2.153843252832965e-05, "loss": 2.6701, "step": 8638 }, { "epoch": 0.7083781706379708, "grad_norm": 0.35471510887145996, "learning_rate": 2.1516144951908256e-05, "loss": 2.6893, "step": 8640 }, { "epoch": 0.7085421470663592, "grad_norm": 0.3700093924999237, "learning_rate": 2.1493865751097486e-05, "loss": 2.6763, "step": 8642 }, { "epoch": 0.7087061234947476, "grad_norm": 0.3778194189071655, "learning_rate": 2.147159493244851e-05, "loss": 2.7109, "step": 8644 }, { "epoch": 0.7088700999231361, "grad_norm": 0.36272796988487244, "learning_rate": 2.1449332502509966e-05, "loss": 2.6617, "step": 8646 }, { "epoch": 0.7090340763515245, "grad_norm": 0.34045878052711487, "learning_rate": 2.1427078467828094e-05, "loss": 2.6436, "step": 8648 }, { "epoch": 0.7091980527799129, "grad_norm": 0.3516055941581726, "learning_rate": 2.1404832834946647e-05, "loss": 2.7442, "step": 8650 }, { "epoch": 0.7093620292083013, "grad_norm": 0.3637882471084595, "learning_rate": 2.1382595610406864e-05, "loss": 2.699, "step": 8652 }, { "epoch": 0.7095260056366898, "grad_norm": 0.38303273916244507, "learning_rate": 2.136036680074761e-05, "loss": 2.731, "step": 8654 }, { "epoch": 0.7096899820650782, "grad_norm": 0.34832051396369934, "learning_rate": 2.133814641250516e-05, "loss": 2.6747, "step": 8656 }, { "epoch": 0.7098539584934666, "grad_norm": 0.35098278522491455, "learning_rate": 2.1315934452213387e-05, "loss": 2.6564, "step": 8658 }, { "epoch": 0.7100179349218549, "grad_norm": 0.3803635835647583, "learning_rate": 2.1293730926403673e-05, "loss": 2.7287, "step": 8660 }, { "epoch": 0.7101819113502434, "grad_norm": 0.38908708095550537, "learning_rate": 2.1271535841604913e-05, "loss": 2.6782, "step": 8662 }, { "epoch": 0.7103458877786318, "grad_norm": 0.36467596888542175, "learning_rate": 2.1249349204343484e-05, "loss": 2.7013, "step": 8664 }, { "epoch": 0.7105098642070202, "grad_norm": 0.3772580623626709, "learning_rate": 2.1227171021143372e-05, "loss": 2.6869, "step": 8666 }, { "epoch": 0.7106738406354086, "grad_norm": 0.35389816761016846, "learning_rate": 2.120500129852597e-05, "loss": 2.7237, "step": 8668 }, { "epoch": 0.7108378170637971, "grad_norm": 0.37267571687698364, "learning_rate": 2.118284004301026e-05, "loss": 2.6979, "step": 8670 }, { "epoch": 0.7110017934921855, "grad_norm": 0.35404422879219055, "learning_rate": 2.1160687261112722e-05, "loss": 2.713, "step": 8672 }, { "epoch": 0.7111657699205739, "grad_norm": 0.3572290539741516, "learning_rate": 2.1138542959347262e-05, "loss": 2.7269, "step": 8674 }, { "epoch": 0.7113297463489623, "grad_norm": 0.4067935645580292, "learning_rate": 2.1116407144225448e-05, "loss": 2.7234, "step": 8676 }, { "epoch": 0.7114937227773508, "grad_norm": 0.3761785328388214, "learning_rate": 2.1094279822256195e-05, "loss": 2.6987, "step": 8678 }, { "epoch": 0.7116576992057392, "grad_norm": 0.3632296919822693, "learning_rate": 2.107216099994603e-05, "loss": 2.721, "step": 8680 }, { "epoch": 0.7118216756341276, "grad_norm": 0.41475990414619446, "learning_rate": 2.1050050683798917e-05, "loss": 2.7017, "step": 8682 }, { "epoch": 0.711985652062516, "grad_norm": 0.3945857584476471, "learning_rate": 2.1027948880316372e-05, "loss": 2.707, "step": 8684 }, { "epoch": 0.7121496284909045, "grad_norm": 0.37521371245384216, "learning_rate": 2.1005855595997327e-05, "loss": 2.6778, "step": 8686 }, { "epoch": 0.7123136049192929, "grad_norm": 0.36565887928009033, "learning_rate": 2.0983770837338325e-05, "loss": 2.6422, "step": 8688 }, { "epoch": 0.7124775813476812, "grad_norm": 0.36480435729026794, "learning_rate": 2.0961694610833287e-05, "loss": 2.6999, "step": 8690 }, { "epoch": 0.7126415577760697, "grad_norm": 0.3911430835723877, "learning_rate": 2.09396269229737e-05, "loss": 2.6891, "step": 8692 }, { "epoch": 0.7128055342044581, "grad_norm": 0.3548656105995178, "learning_rate": 2.091756778024852e-05, "loss": 2.6749, "step": 8694 }, { "epoch": 0.7129695106328465, "grad_norm": 0.3688564598560333, "learning_rate": 2.0895517189144165e-05, "loss": 2.7075, "step": 8696 }, { "epoch": 0.7131334870612349, "grad_norm": 0.36516255140304565, "learning_rate": 2.087347515614456e-05, "loss": 2.6884, "step": 8698 }, { "epoch": 0.7132974634896234, "grad_norm": 0.37855157256126404, "learning_rate": 2.0851441687731133e-05, "loss": 2.6683, "step": 8700 }, { "epoch": 0.7134614399180118, "grad_norm": 0.3581778109073639, "learning_rate": 2.0829416790382772e-05, "loss": 2.6664, "step": 8702 }, { "epoch": 0.7136254163464002, "grad_norm": 0.35958313941955566, "learning_rate": 2.080740047057581e-05, "loss": 2.6719, "step": 8704 }, { "epoch": 0.7137893927747886, "grad_norm": 0.37104856967926025, "learning_rate": 2.0785392734784155e-05, "loss": 2.7047, "step": 8706 }, { "epoch": 0.7139533692031771, "grad_norm": 0.36019831895828247, "learning_rate": 2.0763393589479074e-05, "loss": 2.7314, "step": 8708 }, { "epoch": 0.7141173456315655, "grad_norm": 0.3699694871902466, "learning_rate": 2.074140304112939e-05, "loss": 2.6927, "step": 8710 }, { "epoch": 0.7142813220599539, "grad_norm": 0.3692975342273712, "learning_rate": 2.0719421096201368e-05, "loss": 2.6529, "step": 8712 }, { "epoch": 0.7144452984883423, "grad_norm": 0.35241538286209106, "learning_rate": 2.0697447761158773e-05, "loss": 2.6626, "step": 8714 }, { "epoch": 0.7146092749167308, "grad_norm": 0.34361711144447327, "learning_rate": 2.0675483042462764e-05, "loss": 2.676, "step": 8716 }, { "epoch": 0.7147732513451192, "grad_norm": 0.36636024713516235, "learning_rate": 2.0653526946572037e-05, "loss": 2.6651, "step": 8718 }, { "epoch": 0.7149372277735075, "grad_norm": 0.3608117401599884, "learning_rate": 2.0631579479942752e-05, "loss": 2.6777, "step": 8720 }, { "epoch": 0.7151012042018959, "grad_norm": 0.37384918332099915, "learning_rate": 2.060964064902845e-05, "loss": 2.7356, "step": 8722 }, { "epoch": 0.7152651806302844, "grad_norm": 0.3529508709907532, "learning_rate": 2.0587710460280275e-05, "loss": 2.7252, "step": 8724 }, { "epoch": 0.7154291570586728, "grad_norm": 0.3796766698360443, "learning_rate": 2.0565788920146683e-05, "loss": 2.6545, "step": 8726 }, { "epoch": 0.7155931334870612, "grad_norm": 0.3741827607154846, "learning_rate": 2.0543876035073672e-05, "loss": 2.6834, "step": 8728 }, { "epoch": 0.7157571099154496, "grad_norm": 0.38752180337905884, "learning_rate": 2.052197181150468e-05, "loss": 2.696, "step": 8730 }, { "epoch": 0.7159210863438381, "grad_norm": 0.360067754983902, "learning_rate": 2.0500076255880608e-05, "loss": 2.6315, "step": 8732 }, { "epoch": 0.7160850627722265, "grad_norm": 0.3775290250778198, "learning_rate": 2.0478189374639752e-05, "loss": 2.6661, "step": 8734 }, { "epoch": 0.7162490392006149, "grad_norm": 0.379972368478775, "learning_rate": 2.0456311174217912e-05, "loss": 2.6948, "step": 8736 }, { "epoch": 0.7164130156290033, "grad_norm": 0.3910143971443176, "learning_rate": 2.0434441661048338e-05, "loss": 2.7446, "step": 8738 }, { "epoch": 0.7165769920573918, "grad_norm": 0.3933928906917572, "learning_rate": 2.041258084156169e-05, "loss": 2.7033, "step": 8740 }, { "epoch": 0.7167409684857802, "grad_norm": 0.3594576418399811, "learning_rate": 2.0390728722186126e-05, "loss": 2.6679, "step": 8742 }, { "epoch": 0.7169049449141686, "grad_norm": 0.35524871945381165, "learning_rate": 2.0368885309347162e-05, "loss": 2.6864, "step": 8744 }, { "epoch": 0.717068921342557, "grad_norm": 0.3604062497615814, "learning_rate": 2.0347050609467822e-05, "loss": 2.7033, "step": 8746 }, { "epoch": 0.7172328977709455, "grad_norm": 0.37317147850990295, "learning_rate": 2.0325224628968552e-05, "loss": 2.6738, "step": 8748 }, { "epoch": 0.7173968741993338, "grad_norm": 0.3843664526939392, "learning_rate": 2.0303407374267253e-05, "loss": 2.6998, "step": 8750 }, { "epoch": 0.7175608506277222, "grad_norm": 0.3630846440792084, "learning_rate": 2.028159885177917e-05, "loss": 2.6581, "step": 8752 }, { "epoch": 0.7177248270561107, "grad_norm": 0.38013482093811035, "learning_rate": 2.025979906791713e-05, "loss": 2.6389, "step": 8754 }, { "epoch": 0.7178888034844991, "grad_norm": 0.3656391203403473, "learning_rate": 2.023800802909125e-05, "loss": 2.662, "step": 8756 }, { "epoch": 0.7180527799128875, "grad_norm": 0.377647340297699, "learning_rate": 2.0216225741709154e-05, "loss": 2.7053, "step": 8758 }, { "epoch": 0.7182167563412759, "grad_norm": 0.383730947971344, "learning_rate": 2.0194452212175885e-05, "loss": 2.6918, "step": 8760 }, { "epoch": 0.7183807327696644, "grad_norm": 0.391690731048584, "learning_rate": 2.0172687446893873e-05, "loss": 2.7823, "step": 8762 }, { "epoch": 0.7185447091980528, "grad_norm": 0.3921140432357788, "learning_rate": 2.0150931452263005e-05, "loss": 2.6591, "step": 8764 }, { "epoch": 0.7187086856264412, "grad_norm": 0.34550994634628296, "learning_rate": 2.012918423468058e-05, "loss": 2.6826, "step": 8766 }, { "epoch": 0.7188726620548296, "grad_norm": 0.38729730248451233, "learning_rate": 2.010744580054134e-05, "loss": 2.7353, "step": 8768 }, { "epoch": 0.7190366384832181, "grad_norm": 0.3607577383518219, "learning_rate": 2.0085716156237362e-05, "loss": 2.7071, "step": 8770 }, { "epoch": 0.7192006149116065, "grad_norm": 0.39334240555763245, "learning_rate": 2.0063995308158273e-05, "loss": 2.6816, "step": 8772 }, { "epoch": 0.7193645913399949, "grad_norm": 0.38497117161750793, "learning_rate": 2.0042283262690975e-05, "loss": 2.6765, "step": 8774 }, { "epoch": 0.7195285677683833, "grad_norm": 0.35287126898765564, "learning_rate": 2.002058002621987e-05, "loss": 2.68, "step": 8776 }, { "epoch": 0.7196925441967718, "grad_norm": 0.38193613290786743, "learning_rate": 1.9998885605126743e-05, "loss": 2.7159, "step": 8778 }, { "epoch": 0.7198565206251601, "grad_norm": 0.36058685183525085, "learning_rate": 1.9977200005790804e-05, "loss": 2.7078, "step": 8780 }, { "epoch": 0.7200204970535485, "grad_norm": 0.3733639419078827, "learning_rate": 1.9955523234588613e-05, "loss": 2.7121, "step": 8782 }, { "epoch": 0.7201844734819369, "grad_norm": 0.3472130298614502, "learning_rate": 1.9933855297894196e-05, "loss": 2.6841, "step": 8784 }, { "epoch": 0.7203484499103254, "grad_norm": 0.37057337164878845, "learning_rate": 1.9912196202078976e-05, "loss": 2.6948, "step": 8786 }, { "epoch": 0.7205124263387138, "grad_norm": 0.34663885831832886, "learning_rate": 1.9890545953511702e-05, "loss": 2.6712, "step": 8788 }, { "epoch": 0.7206764027671022, "grad_norm": 0.3575139045715332, "learning_rate": 1.9868904558558655e-05, "loss": 2.6965, "step": 8790 }, { "epoch": 0.7208403791954906, "grad_norm": 0.394976407289505, "learning_rate": 1.9847272023583378e-05, "loss": 2.7299, "step": 8792 }, { "epoch": 0.7210043556238791, "grad_norm": 0.39025330543518066, "learning_rate": 1.9825648354946897e-05, "loss": 2.6544, "step": 8794 }, { "epoch": 0.7211683320522675, "grad_norm": 0.3627324402332306, "learning_rate": 1.9804033559007585e-05, "loss": 2.6766, "step": 8796 }, { "epoch": 0.7213323084806559, "grad_norm": 0.3690572679042816, "learning_rate": 1.978242764212125e-05, "loss": 2.618, "step": 8798 }, { "epoch": 0.7214962849090443, "grad_norm": 0.38577893376350403, "learning_rate": 1.9760830610640994e-05, "loss": 2.6724, "step": 8800 }, { "epoch": 0.7216602613374328, "grad_norm": 0.3648007810115814, "learning_rate": 1.9739242470917468e-05, "loss": 2.6598, "step": 8802 }, { "epoch": 0.7218242377658212, "grad_norm": 0.3590453863143921, "learning_rate": 1.971766322929854e-05, "loss": 2.6617, "step": 8804 }, { "epoch": 0.7219882141942096, "grad_norm": 0.37548232078552246, "learning_rate": 1.9696092892129565e-05, "loss": 2.6536, "step": 8806 }, { "epoch": 0.7221521906225981, "grad_norm": 0.4404735565185547, "learning_rate": 1.9674531465753254e-05, "loss": 2.6849, "step": 8808 }, { "epoch": 0.7223161670509864, "grad_norm": 0.3744325041770935, "learning_rate": 1.9652978956509675e-05, "loss": 2.7112, "step": 8810 }, { "epoch": 0.7224801434793748, "grad_norm": 0.35116636753082275, "learning_rate": 1.96314353707363e-05, "loss": 2.6774, "step": 8812 }, { "epoch": 0.7226441199077632, "grad_norm": 0.40838176012039185, "learning_rate": 1.9609900714767977e-05, "loss": 2.7045, "step": 8814 }, { "epoch": 0.7228080963361517, "grad_norm": 0.3593634068965912, "learning_rate": 1.9588374994936927e-05, "loss": 2.6728, "step": 8816 }, { "epoch": 0.7229720727645401, "grad_norm": 0.3494911193847656, "learning_rate": 1.9566858217572694e-05, "loss": 2.6689, "step": 8818 }, { "epoch": 0.7231360491929285, "grad_norm": 0.3753405511379242, "learning_rate": 1.9545350389002304e-05, "loss": 2.6964, "step": 8820 }, { "epoch": 0.7233000256213169, "grad_norm": 0.36216264963150024, "learning_rate": 1.952385151555003e-05, "loss": 2.7152, "step": 8822 }, { "epoch": 0.7234640020497054, "grad_norm": 0.3647003471851349, "learning_rate": 1.9502361603537583e-05, "loss": 2.7221, "step": 8824 }, { "epoch": 0.7236279784780938, "grad_norm": 0.36026209592819214, "learning_rate": 1.9480880659284022e-05, "loss": 2.6614, "step": 8826 }, { "epoch": 0.7237919549064822, "grad_norm": 0.36493557691574097, "learning_rate": 1.9459408689105786e-05, "loss": 2.7036, "step": 8828 }, { "epoch": 0.7239559313348706, "grad_norm": 0.3542688190937042, "learning_rate": 1.9437945699316617e-05, "loss": 2.6828, "step": 8830 }, { "epoch": 0.7241199077632591, "grad_norm": 0.3724217116832733, "learning_rate": 1.9416491696227677e-05, "loss": 2.7026, "step": 8832 }, { "epoch": 0.7242838841916475, "grad_norm": 0.37069764733314514, "learning_rate": 1.9395046686147488e-05, "loss": 2.6942, "step": 8834 }, { "epoch": 0.7244478606200359, "grad_norm": 0.3715781271457672, "learning_rate": 1.9373610675381847e-05, "loss": 2.6688, "step": 8836 }, { "epoch": 0.7246118370484242, "grad_norm": 0.36626240611076355, "learning_rate": 1.9352183670234038e-05, "loss": 2.7179, "step": 8838 }, { "epoch": 0.7247758134768127, "grad_norm": 0.38260751962661743, "learning_rate": 1.9330765677004563e-05, "loss": 2.6749, "step": 8840 }, { "epoch": 0.7249397899052011, "grad_norm": 0.367906391620636, "learning_rate": 1.930935670199135e-05, "loss": 2.686, "step": 8842 }, { "epoch": 0.7251037663335895, "grad_norm": 0.3517436385154724, "learning_rate": 1.9287956751489662e-05, "loss": 2.6911, "step": 8844 }, { "epoch": 0.7252677427619779, "grad_norm": 0.363772451877594, "learning_rate": 1.9266565831792114e-05, "loss": 2.6554, "step": 8846 }, { "epoch": 0.7254317191903664, "grad_norm": 0.3705129325389862, "learning_rate": 1.9245183949188634e-05, "loss": 2.6706, "step": 8848 }, { "epoch": 0.7255956956187548, "grad_norm": 0.3884037137031555, "learning_rate": 1.922381110996652e-05, "loss": 2.6553, "step": 8850 }, { "epoch": 0.7257596720471432, "grad_norm": 0.3646256625652313, "learning_rate": 1.9202447320410412e-05, "loss": 2.7096, "step": 8852 }, { "epoch": 0.7259236484755316, "grad_norm": 0.3645271360874176, "learning_rate": 1.9181092586802274e-05, "loss": 2.6916, "step": 8854 }, { "epoch": 0.7260876249039201, "grad_norm": 0.36142271757125854, "learning_rate": 1.915974691542144e-05, "loss": 2.6765, "step": 8856 }, { "epoch": 0.7262516013323085, "grad_norm": 0.36564940214157104, "learning_rate": 1.913841031254452e-05, "loss": 2.6845, "step": 8858 }, { "epoch": 0.7264155777606969, "grad_norm": 0.3475258946418762, "learning_rate": 1.91170827844455e-05, "loss": 2.6721, "step": 8860 }, { "epoch": 0.7265795541890853, "grad_norm": 0.32706737518310547, "learning_rate": 1.9095764337395706e-05, "loss": 2.621, "step": 8862 }, { "epoch": 0.7267435306174738, "grad_norm": 0.36696481704711914, "learning_rate": 1.9074454977663774e-05, "loss": 2.7096, "step": 8864 }, { "epoch": 0.7269075070458622, "grad_norm": 0.3692162036895752, "learning_rate": 1.9053154711515638e-05, "loss": 2.649, "step": 8866 }, { "epoch": 0.7270714834742505, "grad_norm": 0.3444785177707672, "learning_rate": 1.9031863545214653e-05, "loss": 2.7086, "step": 8868 }, { "epoch": 0.727235459902639, "grad_norm": 0.35217925906181335, "learning_rate": 1.9010581485021378e-05, "loss": 2.7041, "step": 8870 }, { "epoch": 0.7273994363310274, "grad_norm": 0.3627830147743225, "learning_rate": 1.8989308537193777e-05, "loss": 2.6864, "step": 8872 }, { "epoch": 0.7275634127594158, "grad_norm": 0.3672342300415039, "learning_rate": 1.896804470798713e-05, "loss": 2.6632, "step": 8874 }, { "epoch": 0.7277273891878042, "grad_norm": 0.35386422276496887, "learning_rate": 1.894679000365398e-05, "loss": 2.6621, "step": 8876 }, { "epoch": 0.7278913656161927, "grad_norm": 0.36656153202056885, "learning_rate": 1.8925544430444235e-05, "loss": 2.6506, "step": 8878 }, { "epoch": 0.7280553420445811, "grad_norm": 0.37069380283355713, "learning_rate": 1.8904307994605108e-05, "loss": 2.7138, "step": 8880 }, { "epoch": 0.7282193184729695, "grad_norm": 0.364506334066391, "learning_rate": 1.8883080702381144e-05, "loss": 2.7046, "step": 8882 }, { "epoch": 0.7283832949013579, "grad_norm": 0.3649429678916931, "learning_rate": 1.8861862560014128e-05, "loss": 2.6496, "step": 8884 }, { "epoch": 0.7285472713297464, "grad_norm": 0.3707544207572937, "learning_rate": 1.884065357374326e-05, "loss": 2.7451, "step": 8886 }, { "epoch": 0.7287112477581348, "grad_norm": 0.37504974007606506, "learning_rate": 1.8819453749804956e-05, "loss": 2.6887, "step": 8888 }, { "epoch": 0.7288752241865232, "grad_norm": 0.38775527477264404, "learning_rate": 1.8798263094432987e-05, "loss": 2.6669, "step": 8890 }, { "epoch": 0.7290392006149116, "grad_norm": 0.3648729920387268, "learning_rate": 1.8777081613858416e-05, "loss": 2.6157, "step": 8892 }, { "epoch": 0.7292031770433001, "grad_norm": 0.34821397066116333, "learning_rate": 1.8755909314309616e-05, "loss": 2.6994, "step": 8894 }, { "epoch": 0.7293671534716885, "grad_norm": 0.3641124963760376, "learning_rate": 1.8734746202012233e-05, "loss": 2.7311, "step": 8896 }, { "epoch": 0.7295311299000768, "grad_norm": 0.3881944715976715, "learning_rate": 1.8713592283189235e-05, "loss": 2.7025, "step": 8898 }, { "epoch": 0.7296951063284652, "grad_norm": 0.37551623582839966, "learning_rate": 1.869244756406091e-05, "loss": 2.7076, "step": 8900 }, { "epoch": 0.7298590827568537, "grad_norm": 0.38129666447639465, "learning_rate": 1.8671312050844753e-05, "loss": 2.7015, "step": 8902 }, { "epoch": 0.7300230591852421, "grad_norm": 0.35345518589019775, "learning_rate": 1.8650185749755678e-05, "loss": 2.6657, "step": 8904 }, { "epoch": 0.7301870356136305, "grad_norm": 0.3613397777080536, "learning_rate": 1.8629068667005784e-05, "loss": 2.6862, "step": 8906 }, { "epoch": 0.7303510120420189, "grad_norm": 0.38645192980766296, "learning_rate": 1.8607960808804503e-05, "loss": 2.6648, "step": 8908 }, { "epoch": 0.7305149884704074, "grad_norm": 0.3691881597042084, "learning_rate": 1.8586862181358566e-05, "loss": 2.6725, "step": 8910 }, { "epoch": 0.7306789648987958, "grad_norm": 0.3389580249786377, "learning_rate": 1.8565772790871984e-05, "loss": 2.6795, "step": 8912 }, { "epoch": 0.7308429413271842, "grad_norm": 0.3590739667415619, "learning_rate": 1.8544692643545984e-05, "loss": 2.6866, "step": 8914 }, { "epoch": 0.7310069177555726, "grad_norm": 0.356611967086792, "learning_rate": 1.8523621745579218e-05, "loss": 2.7193, "step": 8916 }, { "epoch": 0.7311708941839611, "grad_norm": 0.3655679225921631, "learning_rate": 1.850256010316747e-05, "loss": 2.6451, "step": 8918 }, { "epoch": 0.7313348706123495, "grad_norm": 0.3675282895565033, "learning_rate": 1.8481507722503893e-05, "loss": 2.6803, "step": 8920 }, { "epoch": 0.7314988470407379, "grad_norm": 0.368619829416275, "learning_rate": 1.84604646097789e-05, "loss": 2.7248, "step": 8922 }, { "epoch": 0.7316628234691264, "grad_norm": 0.38850507140159607, "learning_rate": 1.8439430771180138e-05, "loss": 2.6808, "step": 8924 }, { "epoch": 0.7318267998975148, "grad_norm": 0.35582661628723145, "learning_rate": 1.8418406212892576e-05, "loss": 2.6323, "step": 8926 }, { "epoch": 0.7319907763259031, "grad_norm": 0.380453884601593, "learning_rate": 1.839739094109843e-05, "loss": 2.6512, "step": 8928 }, { "epoch": 0.7321547527542915, "grad_norm": 0.3536703586578369, "learning_rate": 1.837638496197721e-05, "loss": 2.7053, "step": 8930 }, { "epoch": 0.73231872918268, "grad_norm": 0.35601118206977844, "learning_rate": 1.8355388281705632e-05, "loss": 2.7351, "step": 8932 }, { "epoch": 0.7324827056110684, "grad_norm": 0.35562974214553833, "learning_rate": 1.8334400906457775e-05, "loss": 2.6014, "step": 8934 }, { "epoch": 0.7326466820394568, "grad_norm": 0.36037373542785645, "learning_rate": 1.831342284240489e-05, "loss": 2.6584, "step": 8936 }, { "epoch": 0.7328106584678452, "grad_norm": 0.3486712872982025, "learning_rate": 1.829245409571554e-05, "loss": 2.7057, "step": 8938 }, { "epoch": 0.7329746348962337, "grad_norm": 0.35479119420051575, "learning_rate": 1.8271494672555527e-05, "loss": 2.6537, "step": 8940 }, { "epoch": 0.7331386113246221, "grad_norm": 0.35784339904785156, "learning_rate": 1.8250544579087953e-05, "loss": 2.6107, "step": 8942 }, { "epoch": 0.7333025877530105, "grad_norm": 0.353708416223526, "learning_rate": 1.8229603821473095e-05, "loss": 2.6633, "step": 8944 }, { "epoch": 0.7334665641813989, "grad_norm": 0.3418165445327759, "learning_rate": 1.8208672405868553e-05, "loss": 2.6811, "step": 8946 }, { "epoch": 0.7336305406097874, "grad_norm": 0.3424532413482666, "learning_rate": 1.8187750338429187e-05, "loss": 2.6729, "step": 8948 }, { "epoch": 0.7337945170381758, "grad_norm": 0.3533565402030945, "learning_rate": 1.816683762530702e-05, "loss": 2.6634, "step": 8950 }, { "epoch": 0.7339584934665642, "grad_norm": 0.330537348985672, "learning_rate": 1.8145934272651467e-05, "loss": 2.6244, "step": 8952 }, { "epoch": 0.7341224698949526, "grad_norm": 0.34000977873802185, "learning_rate": 1.812504028660904e-05, "loss": 2.7216, "step": 8954 }, { "epoch": 0.7342864463233411, "grad_norm": 0.3350144624710083, "learning_rate": 1.8104155673323602e-05, "loss": 2.664, "step": 8956 }, { "epoch": 0.7344504227517294, "grad_norm": 0.35844945907592773, "learning_rate": 1.8083280438936213e-05, "loss": 2.6992, "step": 8958 }, { "epoch": 0.7346143991801178, "grad_norm": 0.3489479422569275, "learning_rate": 1.8062414589585208e-05, "loss": 2.7228, "step": 8960 }, { "epoch": 0.7347783756085062, "grad_norm": 0.35961857438087463, "learning_rate": 1.8041558131406088e-05, "loss": 2.7139, "step": 8962 }, { "epoch": 0.7349423520368947, "grad_norm": 0.36380940675735474, "learning_rate": 1.8020711070531714e-05, "loss": 2.5657, "step": 8964 }, { "epoch": 0.7351063284652831, "grad_norm": 0.34669262170791626, "learning_rate": 1.7999873413092066e-05, "loss": 2.7073, "step": 8966 }, { "epoch": 0.7352703048936715, "grad_norm": 0.36911582946777344, "learning_rate": 1.7979045165214425e-05, "loss": 2.7098, "step": 8968 }, { "epoch": 0.7354342813220599, "grad_norm": 0.3712131381034851, "learning_rate": 1.7958226333023297e-05, "loss": 2.6947, "step": 8970 }, { "epoch": 0.7355982577504484, "grad_norm": 0.36578479409217834, "learning_rate": 1.7937416922640395e-05, "loss": 2.7744, "step": 8972 }, { "epoch": 0.7357622341788368, "grad_norm": 0.3710160553455353, "learning_rate": 1.791661694018468e-05, "loss": 2.6436, "step": 8974 }, { "epoch": 0.7359262106072252, "grad_norm": 0.3627588748931885, "learning_rate": 1.7895826391772335e-05, "loss": 2.7135, "step": 8976 }, { "epoch": 0.7360901870356137, "grad_norm": 0.3674503266811371, "learning_rate": 1.7875045283516802e-05, "loss": 2.6796, "step": 8978 }, { "epoch": 0.7362541634640021, "grad_norm": 0.3505978584289551, "learning_rate": 1.7854273621528656e-05, "loss": 2.673, "step": 8980 }, { "epoch": 0.7364181398923905, "grad_norm": 0.36206257343292236, "learning_rate": 1.783351141191582e-05, "loss": 2.6763, "step": 8982 }, { "epoch": 0.7365821163207789, "grad_norm": 0.3386150598526001, "learning_rate": 1.7812758660783336e-05, "loss": 2.6658, "step": 8984 }, { "epoch": 0.7367460927491674, "grad_norm": 0.36186760663986206, "learning_rate": 1.779201537423351e-05, "loss": 2.6366, "step": 8986 }, { "epoch": 0.7369100691775557, "grad_norm": 0.3517228960990906, "learning_rate": 1.777128155836586e-05, "loss": 2.6988, "step": 8988 }, { "epoch": 0.7370740456059441, "grad_norm": 0.37019091844558716, "learning_rate": 1.7750557219277126e-05, "loss": 2.6978, "step": 8990 }, { "epoch": 0.7372380220343325, "grad_norm": 0.35576122999191284, "learning_rate": 1.772984236306122e-05, "loss": 2.6686, "step": 8992 }, { "epoch": 0.737401998462721, "grad_norm": 0.3716403841972351, "learning_rate": 1.7709136995809322e-05, "loss": 2.6938, "step": 8994 }, { "epoch": 0.7375659748911094, "grad_norm": 0.37883302569389343, "learning_rate": 1.7688441123609805e-05, "loss": 2.7095, "step": 8996 }, { "epoch": 0.7377299513194978, "grad_norm": 0.3864205479621887, "learning_rate": 1.7667754752548192e-05, "loss": 2.6649, "step": 8998 }, { "epoch": 0.7378939277478862, "grad_norm": 0.36532631516456604, "learning_rate": 1.764707788870733e-05, "loss": 2.6308, "step": 9000 }, { "epoch": 0.7380579041762747, "grad_norm": 0.3471086323261261, "learning_rate": 1.7626410538167155e-05, "loss": 2.7433, "step": 9002 }, { "epoch": 0.7382218806046631, "grad_norm": 0.35020241141319275, "learning_rate": 1.7605752707004864e-05, "loss": 2.6671, "step": 9004 }, { "epoch": 0.7383858570330515, "grad_norm": 0.34402596950531006, "learning_rate": 1.7585104401294845e-05, "loss": 2.6795, "step": 9006 }, { "epoch": 0.7385498334614399, "grad_norm": 0.3468019366264343, "learning_rate": 1.75644656271087e-05, "loss": 2.6707, "step": 9008 }, { "epoch": 0.7387138098898284, "grad_norm": 0.372406005859375, "learning_rate": 1.7543836390515184e-05, "loss": 2.7445, "step": 9010 }, { "epoch": 0.7388777863182168, "grad_norm": 0.36230936646461487, "learning_rate": 1.7523216697580285e-05, "loss": 2.7079, "step": 9012 }, { "epoch": 0.7390417627466052, "grad_norm": 0.3625590205192566, "learning_rate": 1.7502606554367178e-05, "loss": 2.6949, "step": 9014 }, { "epoch": 0.7392057391749935, "grad_norm": 0.35242268443107605, "learning_rate": 1.748200596693623e-05, "loss": 2.6976, "step": 9016 }, { "epoch": 0.739369715603382, "grad_norm": 0.3472610414028168, "learning_rate": 1.7461414941345018e-05, "loss": 2.7097, "step": 9018 }, { "epoch": 0.7395336920317704, "grad_norm": 0.368011474609375, "learning_rate": 1.7440833483648235e-05, "loss": 2.6312, "step": 9020 }, { "epoch": 0.7396976684601588, "grad_norm": 0.36684247851371765, "learning_rate": 1.7420261599897837e-05, "loss": 2.6817, "step": 9022 }, { "epoch": 0.7398616448885472, "grad_norm": 0.36508727073669434, "learning_rate": 1.7399699296142928e-05, "loss": 2.6817, "step": 9024 }, { "epoch": 0.7400256213169357, "grad_norm": 0.36940640211105347, "learning_rate": 1.7379146578429838e-05, "loss": 2.7134, "step": 9026 }, { "epoch": 0.7401895977453241, "grad_norm": 0.3538048267364502, "learning_rate": 1.7358603452801992e-05, "loss": 2.6803, "step": 9028 }, { "epoch": 0.7403535741737125, "grad_norm": 0.35103994607925415, "learning_rate": 1.73380699253001e-05, "loss": 2.7089, "step": 9030 }, { "epoch": 0.7405175506021009, "grad_norm": 0.3555721044540405, "learning_rate": 1.7317546001961966e-05, "loss": 2.7343, "step": 9032 }, { "epoch": 0.7406815270304894, "grad_norm": 0.34971192479133606, "learning_rate": 1.72970316888226e-05, "loss": 2.6545, "step": 9034 }, { "epoch": 0.7408455034588778, "grad_norm": 0.3655741214752197, "learning_rate": 1.727652699191422e-05, "loss": 2.6784, "step": 9036 }, { "epoch": 0.7410094798872662, "grad_norm": 0.34694811701774597, "learning_rate": 1.725603191726614e-05, "loss": 2.6506, "step": 9038 }, { "epoch": 0.7411734563156547, "grad_norm": 0.3528430163860321, "learning_rate": 1.723554647090491e-05, "loss": 2.6951, "step": 9040 }, { "epoch": 0.7413374327440431, "grad_norm": 0.3649402856826782, "learning_rate": 1.7215070658854233e-05, "loss": 2.6766, "step": 9042 }, { "epoch": 0.7415014091724315, "grad_norm": 0.351094514131546, "learning_rate": 1.719460448713498e-05, "loss": 2.6916, "step": 9044 }, { "epoch": 0.7416653856008198, "grad_norm": 0.35957562923431396, "learning_rate": 1.7174147961765136e-05, "loss": 2.6765, "step": 9046 }, { "epoch": 0.7418293620292084, "grad_norm": 0.3619736135005951, "learning_rate": 1.7153701088759953e-05, "loss": 2.6762, "step": 9048 }, { "epoch": 0.7419933384575967, "grad_norm": 0.36906012892723083, "learning_rate": 1.713326387413175e-05, "loss": 2.6537, "step": 9050 }, { "epoch": 0.7421573148859851, "grad_norm": 0.3386414051055908, "learning_rate": 1.7112836323890046e-05, "loss": 2.7311, "step": 9052 }, { "epoch": 0.7423212913143735, "grad_norm": 0.3602679371833801, "learning_rate": 1.709241844404152e-05, "loss": 2.6328, "step": 9054 }, { "epoch": 0.742485267742762, "grad_norm": 0.35406795144081116, "learning_rate": 1.707201024059002e-05, "loss": 2.6582, "step": 9056 }, { "epoch": 0.7426492441711504, "grad_norm": 0.3504626452922821, "learning_rate": 1.7051611719536492e-05, "loss": 2.678, "step": 9058 }, { "epoch": 0.7428132205995388, "grad_norm": 0.35680869221687317, "learning_rate": 1.703122288687909e-05, "loss": 2.6863, "step": 9060 }, { "epoch": 0.7429771970279272, "grad_norm": 0.3517512083053589, "learning_rate": 1.7010843748613127e-05, "loss": 2.6521, "step": 9062 }, { "epoch": 0.7431411734563157, "grad_norm": 0.3569678068161011, "learning_rate": 1.6990474310730975e-05, "loss": 2.7297, "step": 9064 }, { "epoch": 0.7433051498847041, "grad_norm": 0.3471764624118805, "learning_rate": 1.6970114579222302e-05, "loss": 2.6964, "step": 9066 }, { "epoch": 0.7434691263130925, "grad_norm": 0.33761879801750183, "learning_rate": 1.6949764560073783e-05, "loss": 2.6787, "step": 9068 }, { "epoch": 0.7436331027414809, "grad_norm": 0.3718072474002838, "learning_rate": 1.692942425926931e-05, "loss": 2.7235, "step": 9070 }, { "epoch": 0.7437970791698694, "grad_norm": 0.3575357496738434, "learning_rate": 1.69090936827899e-05, "loss": 2.6705, "step": 9072 }, { "epoch": 0.7439610555982578, "grad_norm": 0.3540283143520355, "learning_rate": 1.6888772836613724e-05, "loss": 2.6244, "step": 9074 }, { "epoch": 0.7441250320266461, "grad_norm": 0.36485904455184937, "learning_rate": 1.686846172671604e-05, "loss": 2.6874, "step": 9076 }, { "epoch": 0.7442890084550345, "grad_norm": 0.34155014157295227, "learning_rate": 1.684816035906934e-05, "loss": 2.6234, "step": 9078 }, { "epoch": 0.744452984883423, "grad_norm": 0.3520983159542084, "learning_rate": 1.682786873964315e-05, "loss": 2.7418, "step": 9080 }, { "epoch": 0.7446169613118114, "grad_norm": 0.38358286023139954, "learning_rate": 1.680758687440418e-05, "loss": 2.6917, "step": 9082 }, { "epoch": 0.7447809377401998, "grad_norm": 0.36411455273628235, "learning_rate": 1.6787314769316288e-05, "loss": 2.7203, "step": 9084 }, { "epoch": 0.7449449141685882, "grad_norm": 0.3393744230270386, "learning_rate": 1.67670524303404e-05, "loss": 2.679, "step": 9086 }, { "epoch": 0.7451088905969767, "grad_norm": 0.37584084272384644, "learning_rate": 1.6746799863434637e-05, "loss": 2.7042, "step": 9088 }, { "epoch": 0.7452728670253651, "grad_norm": 0.3599667251110077, "learning_rate": 1.6726557074554206e-05, "loss": 2.6857, "step": 9090 }, { "epoch": 0.7454368434537535, "grad_norm": 0.3916679918766022, "learning_rate": 1.670632406965148e-05, "loss": 2.705, "step": 9092 }, { "epoch": 0.745600819882142, "grad_norm": 0.3436645567417145, "learning_rate": 1.6686100854675867e-05, "loss": 2.6771, "step": 9094 }, { "epoch": 0.7457647963105304, "grad_norm": 0.3559042811393738, "learning_rate": 1.6665887435574025e-05, "loss": 2.6808, "step": 9096 }, { "epoch": 0.7459287727389188, "grad_norm": 0.36732256412506104, "learning_rate": 1.6645683818289615e-05, "loss": 2.6252, "step": 9098 }, { "epoch": 0.7460927491673072, "grad_norm": 0.33569782972335815, "learning_rate": 1.662549000876348e-05, "loss": 2.6614, "step": 9100 }, { "epoch": 0.7462567255956957, "grad_norm": 0.3594054877758026, "learning_rate": 1.6605306012933558e-05, "loss": 2.6761, "step": 9102 }, { "epoch": 0.7464207020240841, "grad_norm": 0.35600408911705017, "learning_rate": 1.6585131836734924e-05, "loss": 2.6402, "step": 9104 }, { "epoch": 0.7465846784524724, "grad_norm": 0.35121795535087585, "learning_rate": 1.6564967486099714e-05, "loss": 2.6475, "step": 9106 }, { "epoch": 0.7467486548808608, "grad_norm": 0.34682193398475647, "learning_rate": 1.6544812966957228e-05, "loss": 2.709, "step": 9108 }, { "epoch": 0.7469126313092493, "grad_norm": 0.37402644753456116, "learning_rate": 1.652466828523387e-05, "loss": 2.6705, "step": 9110 }, { "epoch": 0.7470766077376377, "grad_norm": 0.3731457591056824, "learning_rate": 1.6504533446853083e-05, "loss": 2.7428, "step": 9112 }, { "epoch": 0.7472405841660261, "grad_norm": 0.35329052805900574, "learning_rate": 1.6484408457735534e-05, "loss": 2.6741, "step": 9114 }, { "epoch": 0.7474045605944145, "grad_norm": 0.3553178906440735, "learning_rate": 1.6464293323798885e-05, "loss": 2.6102, "step": 9116 }, { "epoch": 0.747568537022803, "grad_norm": 0.37250661849975586, "learning_rate": 1.644418805095796e-05, "loss": 2.6597, "step": 9118 }, { "epoch": 0.7477325134511914, "grad_norm": 0.3541968762874603, "learning_rate": 1.6424092645124662e-05, "loss": 2.6906, "step": 9120 }, { "epoch": 0.7478964898795798, "grad_norm": 0.3709997236728668, "learning_rate": 1.6404007112208013e-05, "loss": 2.668, "step": 9122 }, { "epoch": 0.7480604663079682, "grad_norm": 0.35433658957481384, "learning_rate": 1.63839314581141e-05, "loss": 2.6439, "step": 9124 }, { "epoch": 0.7482244427363567, "grad_norm": 0.36892589926719666, "learning_rate": 1.6363865688746117e-05, "loss": 2.6989, "step": 9126 }, { "epoch": 0.7483884191647451, "grad_norm": 0.3724561035633087, "learning_rate": 1.634380981000437e-05, "loss": 2.6799, "step": 9128 }, { "epoch": 0.7485523955931335, "grad_norm": 0.3710022568702698, "learning_rate": 1.6323763827786238e-05, "loss": 2.6565, "step": 9130 }, { "epoch": 0.7487163720215219, "grad_norm": 0.35416173934936523, "learning_rate": 1.630372774798621e-05, "loss": 2.6531, "step": 9132 }, { "epoch": 0.7488803484499104, "grad_norm": 0.3451652526855469, "learning_rate": 1.628370157649582e-05, "loss": 2.7002, "step": 9134 }, { "epoch": 0.7490443248782988, "grad_norm": 0.3550792336463928, "learning_rate": 1.626368531920373e-05, "loss": 2.6665, "step": 9136 }, { "epoch": 0.7492083013066871, "grad_norm": 0.3607080280780792, "learning_rate": 1.6243678981995675e-05, "loss": 2.6584, "step": 9138 }, { "epoch": 0.7493722777350755, "grad_norm": 0.35807549953460693, "learning_rate": 1.6223682570754482e-05, "loss": 2.7165, "step": 9140 }, { "epoch": 0.749536254163464, "grad_norm": 0.34347352385520935, "learning_rate": 1.620369609136001e-05, "loss": 2.6811, "step": 9142 }, { "epoch": 0.7497002305918524, "grad_norm": 0.3542768657207489, "learning_rate": 1.6183719549689293e-05, "loss": 2.7023, "step": 9144 }, { "epoch": 0.7498642070202408, "grad_norm": 0.3486849069595337, "learning_rate": 1.616375295161634e-05, "loss": 2.6282, "step": 9146 }, { "epoch": 0.7500281834486292, "grad_norm": 0.38032418489456177, "learning_rate": 1.6143796303012303e-05, "loss": 2.6837, "step": 9148 }, { "epoch": 0.7501921598770177, "grad_norm": 0.354727566242218, "learning_rate": 1.6123849609745395e-05, "loss": 2.66, "step": 9150 }, { "epoch": 0.7503561363054061, "grad_norm": 0.3578219413757324, "learning_rate": 1.6103912877680872e-05, "loss": 2.6621, "step": 9152 }, { "epoch": 0.7505201127337945, "grad_norm": 0.3783826231956482, "learning_rate": 1.6083986112681087e-05, "loss": 2.7165, "step": 9154 }, { "epoch": 0.750684089162183, "grad_norm": 0.3405456840991974, "learning_rate": 1.606406932060547e-05, "loss": 2.6516, "step": 9156 }, { "epoch": 0.7508480655905714, "grad_norm": 0.3643886148929596, "learning_rate": 1.6044162507310516e-05, "loss": 2.6855, "step": 9158 }, { "epoch": 0.7510120420189598, "grad_norm": 0.3696421682834625, "learning_rate": 1.602426567864973e-05, "loss": 2.7, "step": 9160 }, { "epoch": 0.7511760184473482, "grad_norm": 0.36406493186950684, "learning_rate": 1.6004378840473794e-05, "loss": 2.6249, "step": 9162 }, { "epoch": 0.7513399948757367, "grad_norm": 0.3507121503353119, "learning_rate": 1.5984501998630336e-05, "loss": 2.6873, "step": 9164 }, { "epoch": 0.751503971304125, "grad_norm": 0.3467572331428528, "learning_rate": 1.5964635158964115e-05, "loss": 2.7253, "step": 9166 }, { "epoch": 0.7516679477325134, "grad_norm": 0.3796934485435486, "learning_rate": 1.5944778327316916e-05, "loss": 2.6922, "step": 9168 }, { "epoch": 0.7518319241609018, "grad_norm": 0.32870739698410034, "learning_rate": 1.592493150952763e-05, "loss": 2.6737, "step": 9170 }, { "epoch": 0.7519959005892903, "grad_norm": 0.35150548815727234, "learning_rate": 1.590509471143212e-05, "loss": 2.6928, "step": 9172 }, { "epoch": 0.7521598770176787, "grad_norm": 0.36243966221809387, "learning_rate": 1.5885267938863362e-05, "loss": 2.7355, "step": 9174 }, { "epoch": 0.7523238534460671, "grad_norm": 0.3716107904911041, "learning_rate": 1.58654511976514e-05, "loss": 2.6545, "step": 9176 }, { "epoch": 0.7524878298744555, "grad_norm": 0.3467326760292053, "learning_rate": 1.5845644493623246e-05, "loss": 2.6709, "step": 9178 }, { "epoch": 0.752651806302844, "grad_norm": 0.3500862419605255, "learning_rate": 1.582584783260308e-05, "loss": 2.6618, "step": 9180 }, { "epoch": 0.7528157827312324, "grad_norm": 0.3605706989765167, "learning_rate": 1.580606122041202e-05, "loss": 2.7047, "step": 9182 }, { "epoch": 0.7529797591596208, "grad_norm": 0.36824387311935425, "learning_rate": 1.5786284662868284e-05, "loss": 2.6526, "step": 9184 }, { "epoch": 0.7531437355880092, "grad_norm": 0.360293984413147, "learning_rate": 1.576651816578712e-05, "loss": 2.7274, "step": 9186 }, { "epoch": 0.7533077120163977, "grad_norm": 0.3646776080131531, "learning_rate": 1.5746761734980835e-05, "loss": 2.7232, "step": 9188 }, { "epoch": 0.7534716884447861, "grad_norm": 0.36640194058418274, "learning_rate": 1.5727015376258725e-05, "loss": 2.6988, "step": 9190 }, { "epoch": 0.7536356648731745, "grad_norm": 0.345244437456131, "learning_rate": 1.5707279095427213e-05, "loss": 2.6743, "step": 9192 }, { "epoch": 0.7537996413015629, "grad_norm": 0.3678779900074005, "learning_rate": 1.568755289828966e-05, "loss": 2.6476, "step": 9194 }, { "epoch": 0.7539636177299514, "grad_norm": 0.3413792848587036, "learning_rate": 1.5667836790646528e-05, "loss": 2.6376, "step": 9196 }, { "epoch": 0.7541275941583397, "grad_norm": 0.3593304753303528, "learning_rate": 1.56481307782953e-05, "loss": 2.6772, "step": 9198 }, { "epoch": 0.7542915705867281, "grad_norm": 0.34153884649276733, "learning_rate": 1.562843486703046e-05, "loss": 2.7052, "step": 9200 }, { "epoch": 0.7544555470151165, "grad_norm": 0.3468201458454132, "learning_rate": 1.5608749062643557e-05, "loss": 2.6757, "step": 9202 }, { "epoch": 0.754619523443505, "grad_norm": 0.3404935896396637, "learning_rate": 1.5589073370923153e-05, "loss": 2.6191, "step": 9204 }, { "epoch": 0.7547834998718934, "grad_norm": 0.36897000670433044, "learning_rate": 1.5569407797654856e-05, "loss": 2.6617, "step": 9206 }, { "epoch": 0.7549474763002818, "grad_norm": 0.35236266255378723, "learning_rate": 1.5549752348621238e-05, "loss": 2.7046, "step": 9208 }, { "epoch": 0.7551114527286703, "grad_norm": 0.3443339765071869, "learning_rate": 1.5530107029601994e-05, "loss": 2.6796, "step": 9210 }, { "epoch": 0.7552754291570587, "grad_norm": 0.3471938371658325, "learning_rate": 1.5510471846373743e-05, "loss": 2.652, "step": 9212 }, { "epoch": 0.7554394055854471, "grad_norm": 0.36439797282218933, "learning_rate": 1.5490846804710174e-05, "loss": 2.6723, "step": 9214 }, { "epoch": 0.7556033820138355, "grad_norm": 0.35002216696739197, "learning_rate": 1.547123191038199e-05, "loss": 2.6929, "step": 9216 }, { "epoch": 0.755767358442224, "grad_norm": 0.3630889654159546, "learning_rate": 1.5451627169156918e-05, "loss": 2.6383, "step": 9218 }, { "epoch": 0.7559313348706124, "grad_norm": 0.3576160967350006, "learning_rate": 1.543203258679965e-05, "loss": 2.7304, "step": 9220 }, { "epoch": 0.7560953112990008, "grad_norm": 0.36874091625213623, "learning_rate": 1.5412448169071953e-05, "loss": 2.7063, "step": 9222 }, { "epoch": 0.7562592877273892, "grad_norm": 0.3595544099807739, "learning_rate": 1.5392873921732585e-05, "loss": 2.7259, "step": 9224 }, { "epoch": 0.7564232641557777, "grad_norm": 0.34343886375427246, "learning_rate": 1.5373309850537267e-05, "loss": 2.6473, "step": 9226 }, { "epoch": 0.756587240584166, "grad_norm": 0.3752729892730713, "learning_rate": 1.5353755961238825e-05, "loss": 2.6677, "step": 9228 }, { "epoch": 0.7567512170125544, "grad_norm": 0.3584292232990265, "learning_rate": 1.5334212259586995e-05, "loss": 2.6637, "step": 9230 }, { "epoch": 0.7569151934409428, "grad_norm": 0.36585915088653564, "learning_rate": 1.531467875132856e-05, "loss": 2.7071, "step": 9232 }, { "epoch": 0.7570791698693313, "grad_norm": 0.38025403022766113, "learning_rate": 1.5295155442207315e-05, "loss": 2.7071, "step": 9234 }, { "epoch": 0.7572431462977197, "grad_norm": 0.352484792470932, "learning_rate": 1.5275642337964054e-05, "loss": 2.6513, "step": 9236 }, { "epoch": 0.7574071227261081, "grad_norm": 0.3487844169139862, "learning_rate": 1.5256139444336509e-05, "loss": 2.7144, "step": 9238 }, { "epoch": 0.7575710991544965, "grad_norm": 0.3520715534687042, "learning_rate": 1.5236646767059525e-05, "loss": 2.6972, "step": 9240 }, { "epoch": 0.757735075582885, "grad_norm": 0.3567860722541809, "learning_rate": 1.5217164311864835e-05, "loss": 2.6841, "step": 9242 }, { "epoch": 0.7578990520112734, "grad_norm": 0.36982741951942444, "learning_rate": 1.5197692084481218e-05, "loss": 2.6909, "step": 9244 }, { "epoch": 0.7580630284396618, "grad_norm": 0.3679502606391907, "learning_rate": 1.517823009063446e-05, "loss": 2.7107, "step": 9246 }, { "epoch": 0.7582270048680502, "grad_norm": 0.36532971262931824, "learning_rate": 1.5158778336047275e-05, "loss": 2.6451, "step": 9248 }, { "epoch": 0.7583909812964387, "grad_norm": 0.37929171323776245, "learning_rate": 1.5139336826439426e-05, "loss": 2.7053, "step": 9250 }, { "epoch": 0.7585549577248271, "grad_norm": 0.36494672298431396, "learning_rate": 1.5119905567527648e-05, "loss": 2.6861, "step": 9252 }, { "epoch": 0.7587189341532155, "grad_norm": 0.3522074818611145, "learning_rate": 1.5100484565025664e-05, "loss": 2.6979, "step": 9254 }, { "epoch": 0.7588829105816038, "grad_norm": 0.3355129659175873, "learning_rate": 1.5081073824644126e-05, "loss": 2.6913, "step": 9256 }, { "epoch": 0.7590468870099923, "grad_norm": 0.36444979906082153, "learning_rate": 1.5061673352090782e-05, "loss": 2.6772, "step": 9258 }, { "epoch": 0.7592108634383807, "grad_norm": 0.3412935435771942, "learning_rate": 1.5042283153070258e-05, "loss": 2.6816, "step": 9260 }, { "epoch": 0.7593748398667691, "grad_norm": 0.37851637601852417, "learning_rate": 1.5022903233284196e-05, "loss": 2.7013, "step": 9262 }, { "epoch": 0.7595388162951576, "grad_norm": 0.34171062707901, "learning_rate": 1.500353359843123e-05, "loss": 2.7457, "step": 9264 }, { "epoch": 0.759702792723546, "grad_norm": 0.35667383670806885, "learning_rate": 1.4984174254206968e-05, "loss": 2.6442, "step": 9266 }, { "epoch": 0.7598667691519344, "grad_norm": 0.3673805594444275, "learning_rate": 1.496482520630394e-05, "loss": 2.7102, "step": 9268 }, { "epoch": 0.7600307455803228, "grad_norm": 0.35707899928092957, "learning_rate": 1.4945486460411707e-05, "loss": 2.6282, "step": 9270 }, { "epoch": 0.7601947220087113, "grad_norm": 0.38144126534461975, "learning_rate": 1.4926158022216801e-05, "loss": 2.6761, "step": 9272 }, { "epoch": 0.7603586984370997, "grad_norm": 0.3500707447528839, "learning_rate": 1.4906839897402659e-05, "loss": 2.6626, "step": 9274 }, { "epoch": 0.7605226748654881, "grad_norm": 0.3673284947872162, "learning_rate": 1.4887532091649787e-05, "loss": 2.6615, "step": 9276 }, { "epoch": 0.7606866512938765, "grad_norm": 0.36343106627464294, "learning_rate": 1.4868234610635556e-05, "loss": 2.6965, "step": 9278 }, { "epoch": 0.760850627722265, "grad_norm": 0.36105042695999146, "learning_rate": 1.4848947460034357e-05, "loss": 2.6959, "step": 9280 }, { "epoch": 0.7610146041506534, "grad_norm": 0.3486408591270447, "learning_rate": 1.482967064551754e-05, "loss": 2.6633, "step": 9282 }, { "epoch": 0.7611785805790418, "grad_norm": 0.3611408770084381, "learning_rate": 1.4810404172753412e-05, "loss": 2.7379, "step": 9284 }, { "epoch": 0.7613425570074301, "grad_norm": 0.3385476768016815, "learning_rate": 1.479114804740721e-05, "loss": 2.6916, "step": 9286 }, { "epoch": 0.7615065334358186, "grad_norm": 0.37254318594932556, "learning_rate": 1.4771902275141164e-05, "loss": 2.6419, "step": 9288 }, { "epoch": 0.761670509864207, "grad_norm": 0.37367677688598633, "learning_rate": 1.475266686161445e-05, "loss": 2.6737, "step": 9290 }, { "epoch": 0.7618344862925954, "grad_norm": 0.3546677529811859, "learning_rate": 1.4733441812483196e-05, "loss": 2.6456, "step": 9292 }, { "epoch": 0.7619984627209838, "grad_norm": 0.40020716190338135, "learning_rate": 1.4714227133400494e-05, "loss": 2.649, "step": 9294 }, { "epoch": 0.7621624391493723, "grad_norm": 0.3578796982765198, "learning_rate": 1.4695022830016353e-05, "loss": 2.6471, "step": 9296 }, { "epoch": 0.7623264155777607, "grad_norm": 0.37742576003074646, "learning_rate": 1.4675828907977762e-05, "loss": 2.6415, "step": 9298 }, { "epoch": 0.7624903920061491, "grad_norm": 0.3490278720855713, "learning_rate": 1.4656645372928652e-05, "loss": 2.6839, "step": 9300 }, { "epoch": 0.7626543684345375, "grad_norm": 0.35995692014694214, "learning_rate": 1.4637472230509908e-05, "loss": 2.7176, "step": 9302 }, { "epoch": 0.762818344862926, "grad_norm": 0.35832446813583374, "learning_rate": 1.4618309486359305e-05, "loss": 2.6853, "step": 9304 }, { "epoch": 0.7629823212913144, "grad_norm": 0.36390066146850586, "learning_rate": 1.4599157146111664e-05, "loss": 2.6647, "step": 9306 }, { "epoch": 0.7631462977197028, "grad_norm": 0.35885295271873474, "learning_rate": 1.4580015215398634e-05, "loss": 2.6791, "step": 9308 }, { "epoch": 0.7633102741480912, "grad_norm": 0.3560067415237427, "learning_rate": 1.4560883699848877e-05, "loss": 2.6952, "step": 9310 }, { "epoch": 0.7634742505764797, "grad_norm": 0.3524385988712311, "learning_rate": 1.4541762605087982e-05, "loss": 2.7093, "step": 9312 }, { "epoch": 0.763638227004868, "grad_norm": 0.34818336367607117, "learning_rate": 1.4522651936738436e-05, "loss": 2.6766, "step": 9314 }, { "epoch": 0.7638022034332564, "grad_norm": 0.3572460114955902, "learning_rate": 1.4503551700419698e-05, "loss": 2.6575, "step": 9316 }, { "epoch": 0.7639661798616448, "grad_norm": 0.3511589765548706, "learning_rate": 1.4484461901748142e-05, "loss": 2.6927, "step": 9318 }, { "epoch": 0.7641301562900333, "grad_norm": 0.35079702734947205, "learning_rate": 1.4465382546337103e-05, "loss": 2.6937, "step": 9320 }, { "epoch": 0.7642941327184217, "grad_norm": 0.35399383306503296, "learning_rate": 1.4446313639796772e-05, "loss": 2.6608, "step": 9322 }, { "epoch": 0.7644581091468101, "grad_norm": 0.35608798265457153, "learning_rate": 1.4427255187734373e-05, "loss": 2.6855, "step": 9324 }, { "epoch": 0.7646220855751986, "grad_norm": 0.3542160093784332, "learning_rate": 1.4408207195753954e-05, "loss": 2.6515, "step": 9326 }, { "epoch": 0.764786062003587, "grad_norm": 0.3493207097053528, "learning_rate": 1.4389169669456554e-05, "loss": 2.7032, "step": 9328 }, { "epoch": 0.7649500384319754, "grad_norm": 0.34453195333480835, "learning_rate": 1.4370142614440107e-05, "loss": 2.7099, "step": 9330 }, { "epoch": 0.7651140148603638, "grad_norm": 0.3854905962944031, "learning_rate": 1.4351126036299495e-05, "loss": 2.7088, "step": 9332 }, { "epoch": 0.7652779912887523, "grad_norm": 0.34645211696624756, "learning_rate": 1.4332119940626459e-05, "loss": 2.6839, "step": 9334 }, { "epoch": 0.7654419677171407, "grad_norm": 0.36502528190612793, "learning_rate": 1.4313124333009715e-05, "loss": 2.7278, "step": 9336 }, { "epoch": 0.7656059441455291, "grad_norm": 0.3685794472694397, "learning_rate": 1.4294139219034896e-05, "loss": 2.6501, "step": 9338 }, { "epoch": 0.7657699205739175, "grad_norm": 0.3831043541431427, "learning_rate": 1.4275164604284485e-05, "loss": 2.7004, "step": 9340 }, { "epoch": 0.765933897002306, "grad_norm": 0.37475207448005676, "learning_rate": 1.4256200494337973e-05, "loss": 2.6842, "step": 9342 }, { "epoch": 0.7660978734306944, "grad_norm": 0.3695252537727356, "learning_rate": 1.4237246894771682e-05, "loss": 2.6256, "step": 9344 }, { "epoch": 0.7662618498590827, "grad_norm": 0.3627135753631592, "learning_rate": 1.4218303811158873e-05, "loss": 2.6542, "step": 9346 }, { "epoch": 0.7664258262874711, "grad_norm": 0.3664674460887909, "learning_rate": 1.4199371249069725e-05, "loss": 2.6771, "step": 9348 }, { "epoch": 0.7665898027158596, "grad_norm": 0.3648863136768341, "learning_rate": 1.4180449214071334e-05, "loss": 2.6593, "step": 9350 }, { "epoch": 0.766753779144248, "grad_norm": 0.3762522041797638, "learning_rate": 1.4161537711727619e-05, "loss": 2.6608, "step": 9352 }, { "epoch": 0.7669177555726364, "grad_norm": 0.37273311614990234, "learning_rate": 1.414263674759953e-05, "loss": 2.6997, "step": 9354 }, { "epoch": 0.7670817320010248, "grad_norm": 0.37854138016700745, "learning_rate": 1.4123746327244809e-05, "loss": 2.6943, "step": 9356 }, { "epoch": 0.7672457084294133, "grad_norm": 0.35645759105682373, "learning_rate": 1.4104866456218152e-05, "loss": 2.6611, "step": 9358 }, { "epoch": 0.7674096848578017, "grad_norm": 0.35881680250167847, "learning_rate": 1.408599714007116e-05, "loss": 2.6256, "step": 9360 }, { "epoch": 0.7675736612861901, "grad_norm": 0.36051514744758606, "learning_rate": 1.4067138384352274e-05, "loss": 2.6746, "step": 9362 }, { "epoch": 0.7677376377145785, "grad_norm": 0.3703957796096802, "learning_rate": 1.4048290194606883e-05, "loss": 2.6093, "step": 9364 }, { "epoch": 0.767901614142967, "grad_norm": 0.3530588150024414, "learning_rate": 1.4029452576377261e-05, "loss": 2.6738, "step": 9366 }, { "epoch": 0.7680655905713554, "grad_norm": 0.3432809114456177, "learning_rate": 1.401062553520257e-05, "loss": 2.6656, "step": 9368 }, { "epoch": 0.7682295669997438, "grad_norm": 0.3378794491291046, "learning_rate": 1.3991809076618818e-05, "loss": 2.7078, "step": 9370 }, { "epoch": 0.7683935434281322, "grad_norm": 0.3686958849430084, "learning_rate": 1.3973003206159001e-05, "loss": 2.7039, "step": 9372 }, { "epoch": 0.7685575198565207, "grad_norm": 0.35902139544487, "learning_rate": 1.3954207929352891e-05, "loss": 2.6791, "step": 9374 }, { "epoch": 0.768721496284909, "grad_norm": 0.36084020137786865, "learning_rate": 1.3935423251727214e-05, "loss": 2.6493, "step": 9376 }, { "epoch": 0.7688854727132974, "grad_norm": 0.34207090735435486, "learning_rate": 1.3916649178805557e-05, "loss": 2.6368, "step": 9378 }, { "epoch": 0.7690494491416859, "grad_norm": 0.3618917763233185, "learning_rate": 1.3897885716108417e-05, "loss": 2.6939, "step": 9380 }, { "epoch": 0.7692134255700743, "grad_norm": 0.3558717966079712, "learning_rate": 1.3879132869153099e-05, "loss": 2.6459, "step": 9382 }, { "epoch": 0.7693774019984627, "grad_norm": 0.3578108847141266, "learning_rate": 1.3860390643453863e-05, "loss": 2.6555, "step": 9384 }, { "epoch": 0.7695413784268511, "grad_norm": 0.37152203917503357, "learning_rate": 1.3841659044521827e-05, "loss": 2.7305, "step": 9386 }, { "epoch": 0.7697053548552396, "grad_norm": 0.36924272775650024, "learning_rate": 1.3822938077864927e-05, "loss": 2.6941, "step": 9388 }, { "epoch": 0.769869331283628, "grad_norm": 0.34775790572166443, "learning_rate": 1.380422774898808e-05, "loss": 2.724, "step": 9390 }, { "epoch": 0.7700333077120164, "grad_norm": 0.3604590892791748, "learning_rate": 1.3785528063392966e-05, "loss": 2.675, "step": 9392 }, { "epoch": 0.7701972841404048, "grad_norm": 0.3558301329612732, "learning_rate": 1.3766839026578205e-05, "loss": 2.6631, "step": 9394 }, { "epoch": 0.7703612605687933, "grad_norm": 0.35097116231918335, "learning_rate": 1.3748160644039266e-05, "loss": 2.6204, "step": 9396 }, { "epoch": 0.7705252369971817, "grad_norm": 0.3585958182811737, "learning_rate": 1.3729492921268488e-05, "loss": 2.6488, "step": 9398 }, { "epoch": 0.7706892134255701, "grad_norm": 0.35323208570480347, "learning_rate": 1.371083586375505e-05, "loss": 2.6851, "step": 9400 }, { "epoch": 0.7708531898539585, "grad_norm": 0.3569926917552948, "learning_rate": 1.3692189476985024e-05, "loss": 2.7374, "step": 9402 }, { "epoch": 0.771017166282347, "grad_norm": 0.3592425584793091, "learning_rate": 1.3673553766441344e-05, "loss": 2.651, "step": 9404 }, { "epoch": 0.7711811427107353, "grad_norm": 0.36438649892807007, "learning_rate": 1.3654928737603789e-05, "loss": 2.7174, "step": 9406 }, { "epoch": 0.7713451191391237, "grad_norm": 0.36406847834587097, "learning_rate": 1.363631439594903e-05, "loss": 2.7004, "step": 9408 }, { "epoch": 0.7715090955675121, "grad_norm": 0.34943458437919617, "learning_rate": 1.361771074695053e-05, "loss": 2.6884, "step": 9410 }, { "epoch": 0.7716730719959006, "grad_norm": 0.3793748915195465, "learning_rate": 1.359911779607867e-05, "loss": 2.6685, "step": 9412 }, { "epoch": 0.771837048424289, "grad_norm": 0.3477378785610199, "learning_rate": 1.358053554880066e-05, "loss": 2.6585, "step": 9414 }, { "epoch": 0.7720010248526774, "grad_norm": 0.35700762271881104, "learning_rate": 1.356196401058058e-05, "loss": 2.6585, "step": 9416 }, { "epoch": 0.7721650012810658, "grad_norm": 0.3929234743118286, "learning_rate": 1.3543403186879305e-05, "loss": 2.6646, "step": 9418 }, { "epoch": 0.7723289777094543, "grad_norm": 0.3680419325828552, "learning_rate": 1.3524853083154666e-05, "loss": 2.7358, "step": 9420 }, { "epoch": 0.7724929541378427, "grad_norm": 0.35374951362609863, "learning_rate": 1.3506313704861217e-05, "loss": 2.6955, "step": 9422 }, { "epoch": 0.7726569305662311, "grad_norm": 0.3629777133464813, "learning_rate": 1.3487785057450452e-05, "loss": 2.6898, "step": 9424 }, { "epoch": 0.7728209069946195, "grad_norm": 0.3722657561302185, "learning_rate": 1.3469267146370684e-05, "loss": 2.6859, "step": 9426 }, { "epoch": 0.772984883423008, "grad_norm": 0.3497594892978668, "learning_rate": 1.3450759977067024e-05, "loss": 2.7225, "step": 9428 }, { "epoch": 0.7731488598513964, "grad_norm": 0.36085259914398193, "learning_rate": 1.3432263554981488e-05, "loss": 2.6584, "step": 9430 }, { "epoch": 0.7733128362797848, "grad_norm": 0.3492680490016937, "learning_rate": 1.3413777885552898e-05, "loss": 2.6604, "step": 9432 }, { "epoch": 0.7734768127081731, "grad_norm": 0.3547397255897522, "learning_rate": 1.3395302974216944e-05, "loss": 2.6685, "step": 9434 }, { "epoch": 0.7736407891365616, "grad_norm": 0.34876057505607605, "learning_rate": 1.3376838826406075e-05, "loss": 2.7225, "step": 9436 }, { "epoch": 0.77380476556495, "grad_norm": 0.38091421127319336, "learning_rate": 1.3358385447549698e-05, "loss": 2.7215, "step": 9438 }, { "epoch": 0.7739687419933384, "grad_norm": 0.3698398768901825, "learning_rate": 1.3339942843073938e-05, "loss": 2.6935, "step": 9440 }, { "epoch": 0.7741327184217269, "grad_norm": 0.39932647347450256, "learning_rate": 1.332151101840181e-05, "loss": 2.6807, "step": 9442 }, { "epoch": 0.7742966948501153, "grad_norm": 0.35598817467689514, "learning_rate": 1.330308997895316e-05, "loss": 2.6885, "step": 9444 }, { "epoch": 0.7744606712785037, "grad_norm": 0.36231109499931335, "learning_rate": 1.3284679730144661e-05, "loss": 2.7099, "step": 9446 }, { "epoch": 0.7746246477068921, "grad_norm": 0.37510383129119873, "learning_rate": 1.3266280277389764e-05, "loss": 2.6931, "step": 9448 }, { "epoch": 0.7747886241352806, "grad_norm": 0.3591841161251068, "learning_rate": 1.3247891626098819e-05, "loss": 2.7226, "step": 9450 }, { "epoch": 0.774952600563669, "grad_norm": 0.36483752727508545, "learning_rate": 1.322951378167896e-05, "loss": 2.6731, "step": 9452 }, { "epoch": 0.7751165769920574, "grad_norm": 0.36224988102912903, "learning_rate": 1.3211146749534121e-05, "loss": 2.7352, "step": 9454 }, { "epoch": 0.7752805534204458, "grad_norm": 0.3532394766807556, "learning_rate": 1.319279053506513e-05, "loss": 2.7006, "step": 9456 }, { "epoch": 0.7754445298488343, "grad_norm": 0.3436356484889984, "learning_rate": 1.3174445143669556e-05, "loss": 2.5649, "step": 9458 }, { "epoch": 0.7756085062772227, "grad_norm": 0.36256667971611023, "learning_rate": 1.3156110580741826e-05, "loss": 2.6176, "step": 9460 }, { "epoch": 0.775772482705611, "grad_norm": 0.3617798388004303, "learning_rate": 1.3137786851673178e-05, "loss": 2.6551, "step": 9462 }, { "epoch": 0.7759364591339994, "grad_norm": 0.3464323878288269, "learning_rate": 1.311947396185168e-05, "loss": 2.6968, "step": 9464 }, { "epoch": 0.776100435562388, "grad_norm": 0.35490676760673523, "learning_rate": 1.3101171916662142e-05, "loss": 2.6609, "step": 9466 }, { "epoch": 0.7762644119907763, "grad_norm": 0.3500146269798279, "learning_rate": 1.3082880721486301e-05, "loss": 2.6814, "step": 9468 }, { "epoch": 0.7764283884191647, "grad_norm": 0.3556649088859558, "learning_rate": 1.306460038170259e-05, "loss": 2.5957, "step": 9470 }, { "epoch": 0.7765923648475531, "grad_norm": 0.3421004116535187, "learning_rate": 1.3046330902686327e-05, "loss": 2.6814, "step": 9472 }, { "epoch": 0.7767563412759416, "grad_norm": 0.3476956784725189, "learning_rate": 1.3028072289809612e-05, "loss": 2.6997, "step": 9474 }, { "epoch": 0.77692031770433, "grad_norm": 0.3426036238670349, "learning_rate": 1.3009824548441319e-05, "loss": 2.6605, "step": 9476 }, { "epoch": 0.7770842941327184, "grad_norm": 0.3616240918636322, "learning_rate": 1.2991587683947176e-05, "loss": 2.6703, "step": 9478 }, { "epoch": 0.7772482705611068, "grad_norm": 0.33885133266448975, "learning_rate": 1.2973361701689685e-05, "loss": 2.6317, "step": 9480 }, { "epoch": 0.7774122469894953, "grad_norm": 0.3734496235847473, "learning_rate": 1.2955146607028167e-05, "loss": 2.7071, "step": 9482 }, { "epoch": 0.7775762234178837, "grad_norm": 0.35345563292503357, "learning_rate": 1.2936942405318685e-05, "loss": 2.6768, "step": 9484 }, { "epoch": 0.7777401998462721, "grad_norm": 0.36098942160606384, "learning_rate": 1.29187491019142e-05, "loss": 2.7301, "step": 9486 }, { "epoch": 0.7779041762746605, "grad_norm": 0.36525386571884155, "learning_rate": 1.2900566702164374e-05, "loss": 2.6943, "step": 9488 }, { "epoch": 0.778068152703049, "grad_norm": 0.362845778465271, "learning_rate": 1.2882395211415699e-05, "loss": 2.6518, "step": 9490 }, { "epoch": 0.7782321291314374, "grad_norm": 0.33044666051864624, "learning_rate": 1.2864234635011469e-05, "loss": 2.6383, "step": 9492 }, { "epoch": 0.7783961055598257, "grad_norm": 0.35040366649627686, "learning_rate": 1.2846084978291766e-05, "loss": 2.711, "step": 9494 }, { "epoch": 0.7785600819882142, "grad_norm": 0.35295918583869934, "learning_rate": 1.2827946246593436e-05, "loss": 2.6187, "step": 9496 }, { "epoch": 0.7787240584166026, "grad_norm": 0.3368498384952545, "learning_rate": 1.2809818445250138e-05, "loss": 2.6681, "step": 9498 }, { "epoch": 0.778888034844991, "grad_norm": 0.34383425116539, "learning_rate": 1.2791701579592319e-05, "loss": 2.7002, "step": 9500 }, { "epoch": 0.7790520112733794, "grad_norm": 0.3518548309803009, "learning_rate": 1.2773595654947168e-05, "loss": 2.7066, "step": 9502 }, { "epoch": 0.7792159877017679, "grad_norm": 0.3643976151943207, "learning_rate": 1.2755500676638743e-05, "loss": 2.6993, "step": 9504 }, { "epoch": 0.7793799641301563, "grad_norm": 0.36024677753448486, "learning_rate": 1.2737416649987783e-05, "loss": 2.6891, "step": 9506 }, { "epoch": 0.7795439405585447, "grad_norm": 0.36152777075767517, "learning_rate": 1.271934358031187e-05, "loss": 2.6596, "step": 9508 }, { "epoch": 0.7797079169869331, "grad_norm": 0.3847897946834564, "learning_rate": 1.2701281472925347e-05, "loss": 2.6882, "step": 9510 }, { "epoch": 0.7798718934153216, "grad_norm": 0.3748205304145813, "learning_rate": 1.268323033313935e-05, "loss": 2.647, "step": 9512 }, { "epoch": 0.78003586984371, "grad_norm": 0.3616335093975067, "learning_rate": 1.2665190166261747e-05, "loss": 2.673, "step": 9514 }, { "epoch": 0.7801998462720984, "grad_norm": 0.35121390223503113, "learning_rate": 1.2647160977597222e-05, "loss": 2.667, "step": 9516 }, { "epoch": 0.7803638227004868, "grad_norm": 0.35246071219444275, "learning_rate": 1.2629142772447211e-05, "loss": 2.659, "step": 9518 }, { "epoch": 0.7805277991288753, "grad_norm": 0.37305203080177307, "learning_rate": 1.2611135556109926e-05, "loss": 2.7144, "step": 9520 }, { "epoch": 0.7806917755572637, "grad_norm": 0.3591538667678833, "learning_rate": 1.259313933388036e-05, "loss": 2.6792, "step": 9522 }, { "epoch": 0.780855751985652, "grad_norm": 0.3579569160938263, "learning_rate": 1.2575154111050236e-05, "loss": 2.7261, "step": 9524 }, { "epoch": 0.7810197284140404, "grad_norm": 0.34520769119262695, "learning_rate": 1.2557179892908077e-05, "loss": 2.6151, "step": 9526 }, { "epoch": 0.7811837048424289, "grad_norm": 0.3550892770290375, "learning_rate": 1.2539216684739158e-05, "loss": 2.7154, "step": 9528 }, { "epoch": 0.7813476812708173, "grad_norm": 0.33964020013809204, "learning_rate": 1.252126449182554e-05, "loss": 2.7501, "step": 9530 }, { "epoch": 0.7815116576992057, "grad_norm": 0.35189035534858704, "learning_rate": 1.250332331944597e-05, "loss": 2.7275, "step": 9532 }, { "epoch": 0.7816756341275941, "grad_norm": 0.3353702425956726, "learning_rate": 1.2485393172876075e-05, "loss": 2.6851, "step": 9534 }, { "epoch": 0.7818396105559826, "grad_norm": 0.36759746074676514, "learning_rate": 1.2467474057388118e-05, "loss": 2.6827, "step": 9536 }, { "epoch": 0.782003586984371, "grad_norm": 0.35115504264831543, "learning_rate": 1.2449565978251198e-05, "loss": 2.7098, "step": 9538 }, { "epoch": 0.7821675634127594, "grad_norm": 0.3480149805545807, "learning_rate": 1.243166894073114e-05, "loss": 2.6775, "step": 9540 }, { "epoch": 0.7823315398411478, "grad_norm": 0.3457199037075043, "learning_rate": 1.2413782950090535e-05, "loss": 2.6554, "step": 9542 }, { "epoch": 0.7824955162695363, "grad_norm": 0.34468725323677063, "learning_rate": 1.2395908011588691e-05, "loss": 2.6891, "step": 9544 }, { "epoch": 0.7826594926979247, "grad_norm": 0.3378111720085144, "learning_rate": 1.2378044130481713e-05, "loss": 2.6529, "step": 9546 }, { "epoch": 0.7828234691263131, "grad_norm": 0.3310491144657135, "learning_rate": 1.236019131202244e-05, "loss": 2.6336, "step": 9548 }, { "epoch": 0.7829874455547016, "grad_norm": 0.34897878766059875, "learning_rate": 1.2342349561460403e-05, "loss": 2.664, "step": 9550 }, { "epoch": 0.78315142198309, "grad_norm": 0.3302777409553528, "learning_rate": 1.2324518884042002e-05, "loss": 2.6708, "step": 9552 }, { "epoch": 0.7833153984114783, "grad_norm": 0.3559781312942505, "learning_rate": 1.2306699285010243e-05, "loss": 2.699, "step": 9554 }, { "epoch": 0.7834793748398667, "grad_norm": 0.3423970639705658, "learning_rate": 1.2288890769604965e-05, "loss": 2.6859, "step": 9556 }, { "epoch": 0.7836433512682552, "grad_norm": 0.3486066460609436, "learning_rate": 1.2271093343062712e-05, "loss": 2.6774, "step": 9558 }, { "epoch": 0.7838073276966436, "grad_norm": 0.35928991436958313, "learning_rate": 1.2253307010616798e-05, "loss": 2.6821, "step": 9560 }, { "epoch": 0.783971304125032, "grad_norm": 0.35299113392829895, "learning_rate": 1.2235531777497217e-05, "loss": 2.6238, "step": 9562 }, { "epoch": 0.7841352805534204, "grad_norm": 0.34155333042144775, "learning_rate": 1.221776764893075e-05, "loss": 2.6844, "step": 9564 }, { "epoch": 0.7842992569818089, "grad_norm": 0.3751930892467499, "learning_rate": 1.22000146301409e-05, "loss": 2.7065, "step": 9566 }, { "epoch": 0.7844632334101973, "grad_norm": 0.34455281496047974, "learning_rate": 1.2182272726347893e-05, "loss": 2.623, "step": 9568 }, { "epoch": 0.7846272098385857, "grad_norm": 0.35793575644493103, "learning_rate": 1.2164541942768715e-05, "loss": 2.7401, "step": 9570 }, { "epoch": 0.7847911862669741, "grad_norm": 0.3616796135902405, "learning_rate": 1.2146822284617038e-05, "loss": 2.7057, "step": 9572 }, { "epoch": 0.7849551626953626, "grad_norm": 0.3481523394584656, "learning_rate": 1.212911375710329e-05, "loss": 2.6234, "step": 9574 }, { "epoch": 0.785119139123751, "grad_norm": 0.36365076899528503, "learning_rate": 1.2111416365434619e-05, "loss": 2.6293, "step": 9576 }, { "epoch": 0.7852831155521394, "grad_norm": 0.3402523696422577, "learning_rate": 1.2093730114814933e-05, "loss": 2.6603, "step": 9578 }, { "epoch": 0.7854470919805278, "grad_norm": 0.3487389087677002, "learning_rate": 1.207605501044477e-05, "loss": 2.6606, "step": 9580 }, { "epoch": 0.7856110684089163, "grad_norm": 0.33280879259109497, "learning_rate": 1.2058391057521523e-05, "loss": 2.6621, "step": 9582 }, { "epoch": 0.7857750448373046, "grad_norm": 0.34225955605506897, "learning_rate": 1.2040738261239188e-05, "loss": 2.6114, "step": 9584 }, { "epoch": 0.785939021265693, "grad_norm": 0.3415073752403259, "learning_rate": 1.2023096626788544e-05, "loss": 2.6652, "step": 9586 }, { "epoch": 0.7861029976940814, "grad_norm": 0.362202912569046, "learning_rate": 1.2005466159357081e-05, "loss": 2.6809, "step": 9588 }, { "epoch": 0.7862669741224699, "grad_norm": 0.3522995114326477, "learning_rate": 1.1987846864128977e-05, "loss": 2.7043, "step": 9590 }, { "epoch": 0.7864309505508583, "grad_norm": 0.3639811873435974, "learning_rate": 1.1970238746285156e-05, "loss": 2.7075, "step": 9592 }, { "epoch": 0.7865949269792467, "grad_norm": 0.3454490602016449, "learning_rate": 1.195264181100324e-05, "loss": 2.6977, "step": 9594 }, { "epoch": 0.7867589034076351, "grad_norm": 0.3564718961715698, "learning_rate": 1.1935056063457584e-05, "loss": 2.6458, "step": 9596 }, { "epoch": 0.7869228798360236, "grad_norm": 0.3528510630130768, "learning_rate": 1.1917481508819189e-05, "loss": 2.6667, "step": 9598 }, { "epoch": 0.787086856264412, "grad_norm": 0.35008668899536133, "learning_rate": 1.1899918152255873e-05, "loss": 2.7152, "step": 9600 }, { "epoch": 0.7872508326928004, "grad_norm": 0.3496108055114746, "learning_rate": 1.1882365998932054e-05, "loss": 2.7084, "step": 9602 }, { "epoch": 0.7874148091211888, "grad_norm": 0.3711487948894501, "learning_rate": 1.1864825054008915e-05, "loss": 2.7183, "step": 9604 }, { "epoch": 0.7875787855495773, "grad_norm": 0.3646942377090454, "learning_rate": 1.184729532264433e-05, "loss": 2.6834, "step": 9606 }, { "epoch": 0.7877427619779657, "grad_norm": 0.361887663602829, "learning_rate": 1.1829776809992899e-05, "loss": 2.6582, "step": 9608 }, { "epoch": 0.7879067384063541, "grad_norm": 0.35898691415786743, "learning_rate": 1.1812269521205855e-05, "loss": 2.7385, "step": 9610 }, { "epoch": 0.7880707148347426, "grad_norm": 0.36085835099220276, "learning_rate": 1.1794773461431202e-05, "loss": 2.71, "step": 9612 }, { "epoch": 0.788234691263131, "grad_norm": 0.35055938363075256, "learning_rate": 1.1777288635813622e-05, "loss": 2.6739, "step": 9614 }, { "epoch": 0.7883986676915193, "grad_norm": 0.35270988941192627, "learning_rate": 1.1759815049494449e-05, "loss": 2.6612, "step": 9616 }, { "epoch": 0.7885626441199077, "grad_norm": 0.35176971554756165, "learning_rate": 1.1742352707611804e-05, "loss": 2.6839, "step": 9618 }, { "epoch": 0.7887266205482962, "grad_norm": 0.35264796018600464, "learning_rate": 1.1724901615300404e-05, "loss": 2.7064, "step": 9620 }, { "epoch": 0.7888905969766846, "grad_norm": 0.3563726544380188, "learning_rate": 1.170746177769172e-05, "loss": 2.7037, "step": 9622 }, { "epoch": 0.789054573405073, "grad_norm": 0.35047483444213867, "learning_rate": 1.169003319991389e-05, "loss": 2.6715, "step": 9624 }, { "epoch": 0.7892185498334614, "grad_norm": 0.35892969369888306, "learning_rate": 1.167261588709177e-05, "loss": 2.6376, "step": 9626 }, { "epoch": 0.7893825262618499, "grad_norm": 0.3563160002231598, "learning_rate": 1.1655209844346826e-05, "loss": 2.705, "step": 9628 }, { "epoch": 0.7895465026902383, "grad_norm": 0.34378495812416077, "learning_rate": 1.1637815076797326e-05, "loss": 2.6815, "step": 9630 }, { "epoch": 0.7897104791186267, "grad_norm": 0.3731745183467865, "learning_rate": 1.1620431589558118e-05, "loss": 2.6971, "step": 9632 }, { "epoch": 0.7898744555470151, "grad_norm": 0.3554565906524658, "learning_rate": 1.1603059387740784e-05, "loss": 2.7061, "step": 9634 }, { "epoch": 0.7900384319754036, "grad_norm": 0.3700547516345978, "learning_rate": 1.1585698476453605e-05, "loss": 2.6747, "step": 9636 }, { "epoch": 0.790202408403792, "grad_norm": 0.36284008622169495, "learning_rate": 1.1568348860801475e-05, "loss": 2.695, "step": 9638 }, { "epoch": 0.7903663848321804, "grad_norm": 0.3478492498397827, "learning_rate": 1.1551010545886037e-05, "loss": 2.6049, "step": 9640 }, { "epoch": 0.7905303612605687, "grad_norm": 0.3696359097957611, "learning_rate": 1.1533683536805568e-05, "loss": 2.6643, "step": 9642 }, { "epoch": 0.7906943376889572, "grad_norm": 0.35695111751556396, "learning_rate": 1.1516367838655051e-05, "loss": 2.6518, "step": 9644 }, { "epoch": 0.7908583141173456, "grad_norm": 0.370159775018692, "learning_rate": 1.1499063456526088e-05, "loss": 2.6883, "step": 9646 }, { "epoch": 0.791022290545734, "grad_norm": 0.3494625985622406, "learning_rate": 1.1481770395507046e-05, "loss": 2.702, "step": 9648 }, { "epoch": 0.7911862669741224, "grad_norm": 0.3910544216632843, "learning_rate": 1.146448866068287e-05, "loss": 2.6482, "step": 9650 }, { "epoch": 0.7913502434025109, "grad_norm": 0.35285142064094543, "learning_rate": 1.1447218257135222e-05, "loss": 2.6853, "step": 9652 }, { "epoch": 0.7915142198308993, "grad_norm": 0.3534504771232605, "learning_rate": 1.1429959189942429e-05, "loss": 2.659, "step": 9654 }, { "epoch": 0.7916781962592877, "grad_norm": 0.34430253505706787, "learning_rate": 1.1412711464179488e-05, "loss": 2.6492, "step": 9656 }, { "epoch": 0.7918421726876761, "grad_norm": 0.350552499294281, "learning_rate": 1.1395475084918027e-05, "loss": 2.6472, "step": 9658 }, { "epoch": 0.7920061491160646, "grad_norm": 0.3621987998485565, "learning_rate": 1.137825005722638e-05, "loss": 2.6847, "step": 9660 }, { "epoch": 0.792170125544453, "grad_norm": 0.35456162691116333, "learning_rate": 1.1361036386169533e-05, "loss": 2.6756, "step": 9662 }, { "epoch": 0.7923341019728414, "grad_norm": 0.35445648431777954, "learning_rate": 1.1343834076809084e-05, "loss": 2.6655, "step": 9664 }, { "epoch": 0.7924980784012299, "grad_norm": 0.3517984449863434, "learning_rate": 1.1326643134203396e-05, "loss": 2.6912, "step": 9666 }, { "epoch": 0.7926620548296183, "grad_norm": 0.34945622086524963, "learning_rate": 1.130946356340738e-05, "loss": 2.7292, "step": 9668 }, { "epoch": 0.7928260312580067, "grad_norm": 0.344644695520401, "learning_rate": 1.1292295369472654e-05, "loss": 2.7074, "step": 9670 }, { "epoch": 0.792990007686395, "grad_norm": 0.35177162289619446, "learning_rate": 1.1275138557447496e-05, "loss": 2.6542, "step": 9672 }, { "epoch": 0.7931539841147836, "grad_norm": 0.35163941979408264, "learning_rate": 1.1257993132376832e-05, "loss": 2.7232, "step": 9674 }, { "epoch": 0.7933179605431719, "grad_norm": 0.3408806622028351, "learning_rate": 1.124085909930221e-05, "loss": 2.7109, "step": 9676 }, { "epoch": 0.7934819369715603, "grad_norm": 0.3501705229282379, "learning_rate": 1.122373646326187e-05, "loss": 2.6242, "step": 9678 }, { "epoch": 0.7936459133999487, "grad_norm": 0.32166677713394165, "learning_rate": 1.1206625229290674e-05, "loss": 2.6747, "step": 9680 }, { "epoch": 0.7938098898283372, "grad_norm": 0.3594343364238739, "learning_rate": 1.1189525402420143e-05, "loss": 2.7178, "step": 9682 }, { "epoch": 0.7939738662567256, "grad_norm": 0.33665400743484497, "learning_rate": 1.1172436987678464e-05, "loss": 2.6696, "step": 9684 }, { "epoch": 0.794137842685114, "grad_norm": 0.3630342185497284, "learning_rate": 1.1155359990090391e-05, "loss": 2.6589, "step": 9686 }, { "epoch": 0.7943018191135024, "grad_norm": 0.3592086732387543, "learning_rate": 1.1138294414677436e-05, "loss": 2.7248, "step": 9688 }, { "epoch": 0.7944657955418909, "grad_norm": 0.370037704706192, "learning_rate": 1.112124026645765e-05, "loss": 2.7006, "step": 9690 }, { "epoch": 0.7946297719702793, "grad_norm": 0.35149356722831726, "learning_rate": 1.1104197550445777e-05, "loss": 2.6263, "step": 9692 }, { "epoch": 0.7947937483986677, "grad_norm": 0.36438047885894775, "learning_rate": 1.1087166271653188e-05, "loss": 2.6619, "step": 9694 }, { "epoch": 0.7949577248270561, "grad_norm": 0.35217371582984924, "learning_rate": 1.1070146435087908e-05, "loss": 2.6885, "step": 9696 }, { "epoch": 0.7951217012554446, "grad_norm": 0.3550349473953247, "learning_rate": 1.1053138045754546e-05, "loss": 2.6777, "step": 9698 }, { "epoch": 0.795285677683833, "grad_norm": 0.37742725014686584, "learning_rate": 1.1036141108654401e-05, "loss": 2.6227, "step": 9700 }, { "epoch": 0.7954496541122213, "grad_norm": 0.3512582778930664, "learning_rate": 1.1019155628785387e-05, "loss": 2.7196, "step": 9702 }, { "epoch": 0.7956136305406097, "grad_norm": 0.34685733914375305, "learning_rate": 1.1002181611142016e-05, "loss": 2.6882, "step": 9704 }, { "epoch": 0.7957776069689982, "grad_norm": 0.3615514636039734, "learning_rate": 1.0985219060715496e-05, "loss": 2.7012, "step": 9706 }, { "epoch": 0.7959415833973866, "grad_norm": 0.3530972898006439, "learning_rate": 1.0968267982493596e-05, "loss": 2.6649, "step": 9708 }, { "epoch": 0.796105559825775, "grad_norm": 0.3470955491065979, "learning_rate": 1.0951328381460745e-05, "loss": 2.6474, "step": 9710 }, { "epoch": 0.7962695362541634, "grad_norm": 0.3472953140735626, "learning_rate": 1.0934400262598005e-05, "loss": 2.6463, "step": 9712 }, { "epoch": 0.7964335126825519, "grad_norm": 0.3501282036304474, "learning_rate": 1.0917483630883058e-05, "loss": 2.6794, "step": 9714 }, { "epoch": 0.7965974891109403, "grad_norm": 0.35167282819747925, "learning_rate": 1.0900578491290153e-05, "loss": 2.6936, "step": 9716 }, { "epoch": 0.7967614655393287, "grad_norm": 0.3607572615146637, "learning_rate": 1.0883684848790271e-05, "loss": 2.7002, "step": 9718 }, { "epoch": 0.7969254419677171, "grad_norm": 0.3574501872062683, "learning_rate": 1.0866802708350899e-05, "loss": 2.6834, "step": 9720 }, { "epoch": 0.7970894183961056, "grad_norm": 0.3416605293750763, "learning_rate": 1.0849932074936204e-05, "loss": 2.668, "step": 9722 }, { "epoch": 0.797253394824494, "grad_norm": 0.3505396842956543, "learning_rate": 1.0833072953506979e-05, "loss": 2.6654, "step": 9724 }, { "epoch": 0.7974173712528824, "grad_norm": 0.3599120080471039, "learning_rate": 1.081622534902057e-05, "loss": 2.7004, "step": 9726 }, { "epoch": 0.7975813476812709, "grad_norm": 0.3394889235496521, "learning_rate": 1.0799389266430998e-05, "loss": 2.6832, "step": 9728 }, { "epoch": 0.7977453241096593, "grad_norm": 0.3506944179534912, "learning_rate": 1.078256471068887e-05, "loss": 2.6977, "step": 9730 }, { "epoch": 0.7979093005380476, "grad_norm": 0.3479519784450531, "learning_rate": 1.076575168674142e-05, "loss": 2.686, "step": 9732 }, { "epoch": 0.798073276966436, "grad_norm": 0.3654318153858185, "learning_rate": 1.0748950199532443e-05, "loss": 2.7307, "step": 9734 }, { "epoch": 0.7982372533948245, "grad_norm": 0.3649800419807434, "learning_rate": 1.0732160254002422e-05, "loss": 2.7273, "step": 9736 }, { "epoch": 0.7984012298232129, "grad_norm": 0.3571475148200989, "learning_rate": 1.0715381855088368e-05, "loss": 2.6691, "step": 9738 }, { "epoch": 0.7985652062516013, "grad_norm": 0.3482070565223694, "learning_rate": 1.0698615007723938e-05, "loss": 2.7112, "step": 9740 }, { "epoch": 0.7987291826799897, "grad_norm": 0.3333486020565033, "learning_rate": 1.0681859716839387e-05, "loss": 2.6768, "step": 9742 }, { "epoch": 0.7988931591083782, "grad_norm": 0.35339200496673584, "learning_rate": 1.0665115987361585e-05, "loss": 2.6573, "step": 9744 }, { "epoch": 0.7990571355367666, "grad_norm": 0.3474152386188507, "learning_rate": 1.0648383824213947e-05, "loss": 2.7166, "step": 9746 }, { "epoch": 0.799221111965155, "grad_norm": 0.34623461961746216, "learning_rate": 1.0631663232316557e-05, "loss": 2.6694, "step": 9748 }, { "epoch": 0.7993850883935434, "grad_norm": 0.35611262917518616, "learning_rate": 1.061495421658607e-05, "loss": 2.6667, "step": 9750 }, { "epoch": 0.7995490648219319, "grad_norm": 0.34025830030441284, "learning_rate": 1.059825678193569e-05, "loss": 2.7112, "step": 9752 }, { "epoch": 0.7997130412503203, "grad_norm": 0.3424406349658966, "learning_rate": 1.0581570933275314e-05, "loss": 2.6794, "step": 9754 }, { "epoch": 0.7998770176787087, "grad_norm": 0.34959760308265686, "learning_rate": 1.0564896675511344e-05, "loss": 2.6746, "step": 9756 }, { "epoch": 0.8000409941070971, "grad_norm": 0.335534006357193, "learning_rate": 1.054823401354681e-05, "loss": 2.6784, "step": 9758 }, { "epoch": 0.8002049705354856, "grad_norm": 0.33316054940223694, "learning_rate": 1.0531582952281333e-05, "loss": 2.6212, "step": 9760 }, { "epoch": 0.800368946963874, "grad_norm": 0.353601336479187, "learning_rate": 1.0514943496611128e-05, "loss": 2.6618, "step": 9762 }, { "epoch": 0.8005329233922623, "grad_norm": 0.34828969836235046, "learning_rate": 1.0498315651428969e-05, "loss": 2.6689, "step": 9764 }, { "epoch": 0.8006968998206507, "grad_norm": 0.3577967882156372, "learning_rate": 1.0481699421624242e-05, "loss": 2.6315, "step": 9766 }, { "epoch": 0.8008608762490392, "grad_norm": 0.35291171073913574, "learning_rate": 1.046509481208291e-05, "loss": 2.6765, "step": 9768 }, { "epoch": 0.8010248526774276, "grad_norm": 0.34977084398269653, "learning_rate": 1.0448501827687517e-05, "loss": 2.6954, "step": 9770 }, { "epoch": 0.801188829105816, "grad_norm": 0.36282119154930115, "learning_rate": 1.0431920473317214e-05, "loss": 2.6605, "step": 9772 }, { "epoch": 0.8013528055342044, "grad_norm": 0.35283738374710083, "learning_rate": 1.0415350753847675e-05, "loss": 2.7035, "step": 9774 }, { "epoch": 0.8015167819625929, "grad_norm": 0.3526639938354492, "learning_rate": 1.0398792674151203e-05, "loss": 2.6522, "step": 9776 }, { "epoch": 0.8016807583909813, "grad_norm": 0.35673341155052185, "learning_rate": 1.0382246239096665e-05, "loss": 2.6401, "step": 9778 }, { "epoch": 0.8018447348193697, "grad_norm": 0.3352622389793396, "learning_rate": 1.0365711453549504e-05, "loss": 2.6033, "step": 9780 }, { "epoch": 0.8020087112477582, "grad_norm": 0.33179885149002075, "learning_rate": 1.0349188322371705e-05, "loss": 2.6461, "step": 9782 }, { "epoch": 0.8021726876761466, "grad_norm": 0.3531162440776825, "learning_rate": 1.0332676850421914e-05, "loss": 2.6925, "step": 9784 }, { "epoch": 0.802336664104535, "grad_norm": 0.3456948399543762, "learning_rate": 1.031617704255523e-05, "loss": 2.7072, "step": 9786 }, { "epoch": 0.8025006405329234, "grad_norm": 0.3450542092323303, "learning_rate": 1.0299688903623416e-05, "loss": 2.6471, "step": 9788 }, { "epoch": 0.8026646169613119, "grad_norm": 0.3578134775161743, "learning_rate": 1.0283212438474782e-05, "loss": 2.6696, "step": 9790 }, { "epoch": 0.8028285933897003, "grad_norm": 0.3489846885204315, "learning_rate": 1.0266747651954156e-05, "loss": 2.6904, "step": 9792 }, { "epoch": 0.8029925698180886, "grad_norm": 0.354544460773468, "learning_rate": 1.0250294548902989e-05, "loss": 2.6803, "step": 9794 }, { "epoch": 0.803156546246477, "grad_norm": 0.3470914959907532, "learning_rate": 1.0233853134159277e-05, "loss": 2.6734, "step": 9796 }, { "epoch": 0.8033205226748655, "grad_norm": 0.352478563785553, "learning_rate": 1.0217423412557591e-05, "loss": 2.7052, "step": 9798 }, { "epoch": 0.8034844991032539, "grad_norm": 0.3438357710838318, "learning_rate": 1.020100538892902e-05, "loss": 2.674, "step": 9800 }, { "epoch": 0.8036484755316423, "grad_norm": 0.35036608576774597, "learning_rate": 1.0184599068101291e-05, "loss": 2.6085, "step": 9802 }, { "epoch": 0.8038124519600307, "grad_norm": 0.3414437472820282, "learning_rate": 1.0168204454898605e-05, "loss": 2.7029, "step": 9804 }, { "epoch": 0.8039764283884192, "grad_norm": 0.34041205048561096, "learning_rate": 1.0151821554141772e-05, "loss": 2.6796, "step": 9806 }, { "epoch": 0.8041404048168076, "grad_norm": 0.33361494541168213, "learning_rate": 1.0135450370648148e-05, "loss": 2.7028, "step": 9808 }, { "epoch": 0.804304381245196, "grad_norm": 0.33953338861465454, "learning_rate": 1.0119090909231654e-05, "loss": 2.689, "step": 9810 }, { "epoch": 0.8044683576735844, "grad_norm": 0.34792453050613403, "learning_rate": 1.0102743174702722e-05, "loss": 2.6475, "step": 9812 }, { "epoch": 0.8046323341019729, "grad_norm": 0.3441803455352783, "learning_rate": 1.0086407171868384e-05, "loss": 2.645, "step": 9814 }, { "epoch": 0.8047963105303613, "grad_norm": 0.3474769592285156, "learning_rate": 1.0070082905532196e-05, "loss": 2.7266, "step": 9816 }, { "epoch": 0.8049602869587497, "grad_norm": 0.3321525752544403, "learning_rate": 1.0053770380494276e-05, "loss": 2.7016, "step": 9818 }, { "epoch": 0.805124263387138, "grad_norm": 0.3411124646663666, "learning_rate": 1.0037469601551292e-05, "loss": 2.6623, "step": 9820 }, { "epoch": 0.8052882398155266, "grad_norm": 0.344778448343277, "learning_rate": 1.0021180573496425e-05, "loss": 2.6569, "step": 9822 }, { "epoch": 0.8054522162439149, "grad_norm": 0.35948947072029114, "learning_rate": 1.0004903301119445e-05, "loss": 2.6838, "step": 9824 }, { "epoch": 0.8056161926723033, "grad_norm": 0.3361837863922119, "learning_rate": 9.988637789206634e-06, "loss": 2.647, "step": 9826 }, { "epoch": 0.8057801691006917, "grad_norm": 0.3391808569431305, "learning_rate": 9.97238404254085e-06, "loss": 2.6502, "step": 9828 }, { "epoch": 0.8059441455290802, "grad_norm": 0.34090256690979004, "learning_rate": 9.956142065901424e-06, "loss": 2.6902, "step": 9830 }, { "epoch": 0.8061081219574686, "grad_norm": 0.3638874590396881, "learning_rate": 9.939911864064328e-06, "loss": 2.7211, "step": 9832 }, { "epoch": 0.806272098385857, "grad_norm": 0.3327675461769104, "learning_rate": 9.923693441801974e-06, "loss": 2.6652, "step": 9834 }, { "epoch": 0.8064360748142455, "grad_norm": 0.34956666827201843, "learning_rate": 9.907486803883359e-06, "loss": 2.7232, "step": 9836 }, { "epoch": 0.8066000512426339, "grad_norm": 0.33703866600990295, "learning_rate": 9.891291955074027e-06, "loss": 2.6102, "step": 9838 }, { "epoch": 0.8067640276710223, "grad_norm": 0.34120437502861023, "learning_rate": 9.875108900136009e-06, "loss": 2.6803, "step": 9840 }, { "epoch": 0.8069280040994107, "grad_norm": 0.3307841718196869, "learning_rate": 9.858937643827898e-06, "loss": 2.66, "step": 9842 }, { "epoch": 0.8070919805277992, "grad_norm": 0.34485509991645813, "learning_rate": 9.842778190904828e-06, "loss": 2.6461, "step": 9844 }, { "epoch": 0.8072559569561876, "grad_norm": 0.3399493098258972, "learning_rate": 9.826630546118449e-06, "loss": 2.6222, "step": 9846 }, { "epoch": 0.807419933384576, "grad_norm": 0.3321893811225891, "learning_rate": 9.810494714216906e-06, "loss": 2.6766, "step": 9848 }, { "epoch": 0.8075839098129644, "grad_norm": 0.36160576343536377, "learning_rate": 9.79437069994495e-06, "loss": 2.6517, "step": 9850 }, { "epoch": 0.8077478862413529, "grad_norm": 0.3361883759498596, "learning_rate": 9.77825850804377e-06, "loss": 2.6644, "step": 9852 }, { "epoch": 0.8079118626697412, "grad_norm": 0.33430948853492737, "learning_rate": 9.762158143251138e-06, "loss": 2.6626, "step": 9854 }, { "epoch": 0.8080758390981296, "grad_norm": 0.3511503040790558, "learning_rate": 9.746069610301323e-06, "loss": 2.6169, "step": 9856 }, { "epoch": 0.808239815526518, "grad_norm": 0.3396109342575073, "learning_rate": 9.729992913925135e-06, "loss": 2.6345, "step": 9858 }, { "epoch": 0.8084037919549065, "grad_norm": 0.35154980421066284, "learning_rate": 9.713928058849859e-06, "loss": 2.6742, "step": 9860 }, { "epoch": 0.8085677683832949, "grad_norm": 0.34014129638671875, "learning_rate": 9.697875049799348e-06, "loss": 2.6364, "step": 9862 }, { "epoch": 0.8087317448116833, "grad_norm": 0.34088748693466187, "learning_rate": 9.681833891493957e-06, "loss": 2.6671, "step": 9864 }, { "epoch": 0.8088957212400717, "grad_norm": 0.33685797452926636, "learning_rate": 9.665804588650518e-06, "loss": 2.6597, "step": 9866 }, { "epoch": 0.8090596976684602, "grad_norm": 0.3836219608783722, "learning_rate": 9.649787145982459e-06, "loss": 2.6971, "step": 9868 }, { "epoch": 0.8092236740968486, "grad_norm": 0.340302050113678, "learning_rate": 9.633781568199634e-06, "loss": 2.7069, "step": 9870 }, { "epoch": 0.809387650525237, "grad_norm": 0.362107515335083, "learning_rate": 9.617787860008454e-06, "loss": 2.7141, "step": 9872 }, { "epoch": 0.8095516269536254, "grad_norm": 0.3450830280780792, "learning_rate": 9.601806026111837e-06, "loss": 2.6461, "step": 9874 }, { "epoch": 0.8097156033820139, "grad_norm": 0.3368300497531891, "learning_rate": 9.585836071209214e-06, "loss": 2.6376, "step": 9876 }, { "epoch": 0.8098795798104023, "grad_norm": 0.35574182868003845, "learning_rate": 9.569877999996474e-06, "loss": 2.6371, "step": 9878 }, { "epoch": 0.8100435562387907, "grad_norm": 0.3361555337905884, "learning_rate": 9.553931817166101e-06, "loss": 2.6597, "step": 9880 }, { "epoch": 0.810207532667179, "grad_norm": 0.33037787675857544, "learning_rate": 9.537997527406995e-06, "loss": 2.6666, "step": 9882 }, { "epoch": 0.8103715090955675, "grad_norm": 0.34116220474243164, "learning_rate": 9.522075135404612e-06, "loss": 2.664, "step": 9884 }, { "epoch": 0.8105354855239559, "grad_norm": 0.34324443340301514, "learning_rate": 9.506164645840903e-06, "loss": 2.6976, "step": 9886 }, { "epoch": 0.8106994619523443, "grad_norm": 0.33906570076942444, "learning_rate": 9.490266063394282e-06, "loss": 2.6743, "step": 9888 }, { "epoch": 0.8108634383807327, "grad_norm": 0.3425237536430359, "learning_rate": 9.474379392739712e-06, "loss": 2.6409, "step": 9890 }, { "epoch": 0.8110274148091212, "grad_norm": 0.33542388677597046, "learning_rate": 9.45850463854862e-06, "loss": 2.6528, "step": 9892 }, { "epoch": 0.8111913912375096, "grad_norm": 0.34473469853401184, "learning_rate": 9.442641805488962e-06, "loss": 2.681, "step": 9894 }, { "epoch": 0.811355367665898, "grad_norm": 0.34288427233695984, "learning_rate": 9.426790898225123e-06, "loss": 2.7326, "step": 9896 }, { "epoch": 0.8115193440942865, "grad_norm": 0.3429391384124756, "learning_rate": 9.410951921418076e-06, "loss": 2.6642, "step": 9898 }, { "epoch": 0.8116833205226749, "grad_norm": 0.33814674615859985, "learning_rate": 9.395124879725204e-06, "loss": 2.6718, "step": 9900 }, { "epoch": 0.8118472969510633, "grad_norm": 0.33827948570251465, "learning_rate": 9.379309777800411e-06, "loss": 2.6861, "step": 9902 }, { "epoch": 0.8120112733794517, "grad_norm": 0.35360562801361084, "learning_rate": 9.363506620294105e-06, "loss": 2.5944, "step": 9904 }, { "epoch": 0.8121752498078402, "grad_norm": 0.36014726758003235, "learning_rate": 9.34771541185317e-06, "loss": 2.657, "step": 9906 }, { "epoch": 0.8123392262362286, "grad_norm": 0.3329550325870514, "learning_rate": 9.331936157120952e-06, "loss": 2.6732, "step": 9908 }, { "epoch": 0.812503202664617, "grad_norm": 0.3534879982471466, "learning_rate": 9.316168860737312e-06, "loss": 2.6582, "step": 9910 }, { "epoch": 0.8126671790930053, "grad_norm": 0.3599652647972107, "learning_rate": 9.300413527338609e-06, "loss": 2.6587, "step": 9912 }, { "epoch": 0.8128311555213938, "grad_norm": 0.3527226746082306, "learning_rate": 9.284670161557612e-06, "loss": 2.6586, "step": 9914 }, { "epoch": 0.8129951319497822, "grad_norm": 0.33899617195129395, "learning_rate": 9.268938768023683e-06, "loss": 2.6222, "step": 9916 }, { "epoch": 0.8131591083781706, "grad_norm": 0.35475972294807434, "learning_rate": 9.253219351362551e-06, "loss": 2.6785, "step": 9918 }, { "epoch": 0.813323084806559, "grad_norm": 0.33801156282424927, "learning_rate": 9.2375119161965e-06, "loss": 2.6811, "step": 9920 }, { "epoch": 0.8134870612349475, "grad_norm": 0.3491133749485016, "learning_rate": 9.221816467144251e-06, "loss": 2.6727, "step": 9922 }, { "epoch": 0.8136510376633359, "grad_norm": 0.3517976403236389, "learning_rate": 9.206133008821033e-06, "loss": 2.638, "step": 9924 }, { "epoch": 0.8138150140917243, "grad_norm": 0.35021093487739563, "learning_rate": 9.190461545838508e-06, "loss": 2.6478, "step": 9926 }, { "epoch": 0.8139789905201127, "grad_norm": 0.34959086775779724, "learning_rate": 9.17480208280484e-06, "loss": 2.6197, "step": 9928 }, { "epoch": 0.8141429669485012, "grad_norm": 0.3512830138206482, "learning_rate": 9.159154624324656e-06, "loss": 2.6348, "step": 9930 }, { "epoch": 0.8143069433768896, "grad_norm": 0.3498641550540924, "learning_rate": 9.143519174999065e-06, "loss": 2.648, "step": 9932 }, { "epoch": 0.814470919805278, "grad_norm": 0.3432711958885193, "learning_rate": 9.127895739425635e-06, "loss": 2.6947, "step": 9934 }, { "epoch": 0.8146348962336664, "grad_norm": 0.3347759544849396, "learning_rate": 9.112284322198377e-06, "loss": 2.6821, "step": 9936 }, { "epoch": 0.8147988726620549, "grad_norm": 0.3357333540916443, "learning_rate": 9.09668492790781e-06, "loss": 2.6794, "step": 9938 }, { "epoch": 0.8149628490904433, "grad_norm": 0.3522830903530121, "learning_rate": 9.081097561140895e-06, "loss": 2.6683, "step": 9940 }, { "epoch": 0.8151268255188316, "grad_norm": 0.34263843297958374, "learning_rate": 9.06552222648107e-06, "loss": 2.6603, "step": 9942 }, { "epoch": 0.81529080194722, "grad_norm": 0.3587329089641571, "learning_rate": 9.049958928508196e-06, "loss": 2.6662, "step": 9944 }, { "epoch": 0.8154547783756085, "grad_norm": 0.33674511313438416, "learning_rate": 9.034407671798673e-06, "loss": 2.6188, "step": 9946 }, { "epoch": 0.8156187548039969, "grad_norm": 0.3474772870540619, "learning_rate": 9.018868460925267e-06, "loss": 2.6453, "step": 9948 }, { "epoch": 0.8157827312323853, "grad_norm": 0.34455257654190063, "learning_rate": 9.003341300457263e-06, "loss": 2.6932, "step": 9950 }, { "epoch": 0.8159467076607738, "grad_norm": 0.3453068435192108, "learning_rate": 8.987826194960403e-06, "loss": 2.7217, "step": 9952 }, { "epoch": 0.8161106840891622, "grad_norm": 0.3331321179866791, "learning_rate": 8.972323148996831e-06, "loss": 2.6547, "step": 9954 }, { "epoch": 0.8162746605175506, "grad_norm": 0.33736076951026917, "learning_rate": 8.956832167125206e-06, "loss": 2.6524, "step": 9956 }, { "epoch": 0.816438636945939, "grad_norm": 0.329560250043869, "learning_rate": 8.941353253900608e-06, "loss": 2.6861, "step": 9958 }, { "epoch": 0.8166026133743275, "grad_norm": 0.3507496118545532, "learning_rate": 8.92588641387459e-06, "loss": 2.7091, "step": 9960 }, { "epoch": 0.8167665898027159, "grad_norm": 0.329371839761734, "learning_rate": 8.910431651595097e-06, "loss": 2.6549, "step": 9962 }, { "epoch": 0.8169305662311043, "grad_norm": 0.3291900157928467, "learning_rate": 8.894988971606628e-06, "loss": 2.642, "step": 9964 }, { "epoch": 0.8170945426594927, "grad_norm": 0.33792978525161743, "learning_rate": 8.879558378450015e-06, "loss": 2.7, "step": 9966 }, { "epoch": 0.8172585190878812, "grad_norm": 0.34438803791999817, "learning_rate": 8.864139876662609e-06, "loss": 2.6064, "step": 9968 }, { "epoch": 0.8174224955162696, "grad_norm": 0.3284120559692383, "learning_rate": 8.848733470778175e-06, "loss": 2.6614, "step": 9970 }, { "epoch": 0.8175864719446579, "grad_norm": 0.350264310836792, "learning_rate": 8.83333916532696e-06, "loss": 2.6905, "step": 9972 }, { "epoch": 0.8177504483730463, "grad_norm": 0.3350590467453003, "learning_rate": 8.817956964835578e-06, "loss": 2.6826, "step": 9974 }, { "epoch": 0.8179144248014348, "grad_norm": 0.3608088791370392, "learning_rate": 8.802586873827157e-06, "loss": 2.6529, "step": 9976 }, { "epoch": 0.8180784012298232, "grad_norm": 0.3360646963119507, "learning_rate": 8.787228896821242e-06, "loss": 2.6166, "step": 9978 }, { "epoch": 0.8182423776582116, "grad_norm": 0.3609916865825653, "learning_rate": 8.771883038333772e-06, "loss": 2.6782, "step": 9980 }, { "epoch": 0.8184063540866, "grad_norm": 0.350242555141449, "learning_rate": 8.756549302877216e-06, "loss": 2.656, "step": 9982 }, { "epoch": 0.8185703305149885, "grad_norm": 0.33730074763298035, "learning_rate": 8.74122769496038e-06, "loss": 2.6526, "step": 9984 }, { "epoch": 0.8187343069433769, "grad_norm": 0.3400452435016632, "learning_rate": 8.725918219088558e-06, "loss": 2.6555, "step": 9986 }, { "epoch": 0.8188982833717653, "grad_norm": 0.34636190533638, "learning_rate": 8.71062087976347e-06, "loss": 2.6516, "step": 9988 }, { "epoch": 0.8190622598001537, "grad_norm": 0.3500151038169861, "learning_rate": 8.695335681483274e-06, "loss": 2.6861, "step": 9990 }, { "epoch": 0.8192262362285422, "grad_norm": 0.3363935351371765, "learning_rate": 8.680062628742509e-06, "loss": 2.6424, "step": 9992 }, { "epoch": 0.8193902126569306, "grad_norm": 0.3397931456565857, "learning_rate": 8.664801726032229e-06, "loss": 2.6686, "step": 9994 }, { "epoch": 0.819554189085319, "grad_norm": 0.3449036478996277, "learning_rate": 8.649552977839825e-06, "loss": 2.681, "step": 9996 }, { "epoch": 0.8197181655137074, "grad_norm": 0.3322703242301941, "learning_rate": 8.634316388649177e-06, "loss": 2.7286, "step": 9998 }, { "epoch": 0.8198821419420959, "grad_norm": 0.32783421874046326, "learning_rate": 8.619091962940579e-06, "loss": 2.6478, "step": 10000 }, { "epoch": 0.8200461183704842, "grad_norm": 0.3606761395931244, "learning_rate": 8.603879705190704e-06, "loss": 2.6793, "step": 10002 }, { "epoch": 0.8202100947988726, "grad_norm": 0.3356943130493164, "learning_rate": 8.588679619872703e-06, "loss": 2.6795, "step": 10004 }, { "epoch": 0.820374071227261, "grad_norm": 0.3528063893318176, "learning_rate": 8.573491711456122e-06, "loss": 2.6769, "step": 10006 }, { "epoch": 0.8205380476556495, "grad_norm": 0.3395545780658722, "learning_rate": 8.558315984406945e-06, "loss": 2.6603, "step": 10008 }, { "epoch": 0.8207020240840379, "grad_norm": 0.3434174954891205, "learning_rate": 8.543152443187513e-06, "loss": 2.6689, "step": 10010 }, { "epoch": 0.8208660005124263, "grad_norm": 0.34519821405410767, "learning_rate": 8.528001092256687e-06, "loss": 2.6359, "step": 10012 }, { "epoch": 0.8210299769408148, "grad_norm": 0.35039666295051575, "learning_rate": 8.512861936069643e-06, "loss": 2.6996, "step": 10014 }, { "epoch": 0.8211939533692032, "grad_norm": 0.3205336630344391, "learning_rate": 8.497734979078036e-06, "loss": 2.5712, "step": 10016 }, { "epoch": 0.8213579297975916, "grad_norm": 0.33510705828666687, "learning_rate": 8.482620225729903e-06, "loss": 2.6589, "step": 10018 }, { "epoch": 0.82152190622598, "grad_norm": 0.3264015316963196, "learning_rate": 8.467517680469717e-06, "loss": 2.6254, "step": 10020 }, { "epoch": 0.8216858826543685, "grad_norm": 0.34548842906951904, "learning_rate": 8.452427347738324e-06, "loss": 2.6759, "step": 10022 }, { "epoch": 0.8218498590827569, "grad_norm": 0.3275145888328552, "learning_rate": 8.437349231973007e-06, "loss": 2.655, "step": 10024 }, { "epoch": 0.8220138355111453, "grad_norm": 0.3393157720565796, "learning_rate": 8.422283337607472e-06, "loss": 2.6652, "step": 10026 }, { "epoch": 0.8221778119395337, "grad_norm": 0.3465496301651001, "learning_rate": 8.407229669071765e-06, "loss": 2.6291, "step": 10028 }, { "epoch": 0.8223417883679222, "grad_norm": 0.34284669160842896, "learning_rate": 8.392188230792435e-06, "loss": 2.6952, "step": 10030 }, { "epoch": 0.8225057647963105, "grad_norm": 0.3246166706085205, "learning_rate": 8.377159027192344e-06, "loss": 2.6823, "step": 10032 }, { "epoch": 0.8226697412246989, "grad_norm": 0.32913273572921753, "learning_rate": 8.362142062690803e-06, "loss": 2.6534, "step": 10034 }, { "epoch": 0.8228337176530873, "grad_norm": 0.3397602438926697, "learning_rate": 8.347137341703521e-06, "loss": 2.6276, "step": 10036 }, { "epoch": 0.8229976940814758, "grad_norm": 0.3419578969478607, "learning_rate": 8.33214486864261e-06, "loss": 2.6604, "step": 10038 }, { "epoch": 0.8231616705098642, "grad_norm": 0.3334288001060486, "learning_rate": 8.317164647916542e-06, "loss": 2.6728, "step": 10040 }, { "epoch": 0.8233256469382526, "grad_norm": 0.3433830142021179, "learning_rate": 8.302196683930235e-06, "loss": 2.6611, "step": 10042 }, { "epoch": 0.823489623366641, "grad_norm": 0.3323787450790405, "learning_rate": 8.287240981084987e-06, "loss": 2.6449, "step": 10044 }, { "epoch": 0.8236535997950295, "grad_norm": 0.3221453130245209, "learning_rate": 8.27229754377848e-06, "loss": 2.6368, "step": 10046 }, { "epoch": 0.8238175762234179, "grad_norm": 0.35179656744003296, "learning_rate": 8.257366376404806e-06, "loss": 2.716, "step": 10048 }, { "epoch": 0.8239815526518063, "grad_norm": 0.342302531003952, "learning_rate": 8.242447483354421e-06, "loss": 2.6969, "step": 10050 }, { "epoch": 0.8241455290801947, "grad_norm": 0.34897375106811523, "learning_rate": 8.227540869014206e-06, "loss": 2.7003, "step": 10052 }, { "epoch": 0.8243095055085832, "grad_norm": 0.3634078800678253, "learning_rate": 8.212646537767404e-06, "loss": 2.6961, "step": 10054 }, { "epoch": 0.8244734819369716, "grad_norm": 0.33835893869400024, "learning_rate": 8.197764493993687e-06, "loss": 2.6282, "step": 10056 }, { "epoch": 0.82463745836536, "grad_norm": 0.3589731454849243, "learning_rate": 8.18289474206903e-06, "loss": 2.7185, "step": 10058 }, { "epoch": 0.8248014347937483, "grad_norm": 0.3365156650543213, "learning_rate": 8.168037286365915e-06, "loss": 2.5839, "step": 10060 }, { "epoch": 0.8249654112221368, "grad_norm": 0.3548052906990051, "learning_rate": 8.15319213125309e-06, "loss": 2.6798, "step": 10062 }, { "epoch": 0.8251293876505252, "grad_norm": 0.35138753056526184, "learning_rate": 8.138359281095759e-06, "loss": 2.688, "step": 10064 }, { "epoch": 0.8252933640789136, "grad_norm": 0.34818702936172485, "learning_rate": 8.123538740255498e-06, "loss": 2.6736, "step": 10066 }, { "epoch": 0.8254573405073021, "grad_norm": 0.3458591103553772, "learning_rate": 8.10873051309023e-06, "loss": 2.6278, "step": 10068 }, { "epoch": 0.8256213169356905, "grad_norm": 0.3472747504711151, "learning_rate": 8.093934603954283e-06, "loss": 2.6768, "step": 10070 }, { "epoch": 0.8257852933640789, "grad_norm": 0.3340248167514801, "learning_rate": 8.079151017198367e-06, "loss": 2.6419, "step": 10072 }, { "epoch": 0.8259492697924673, "grad_norm": 0.35043764114379883, "learning_rate": 8.064379757169571e-06, "loss": 2.6365, "step": 10074 }, { "epoch": 0.8261132462208558, "grad_norm": 0.3328492343425751, "learning_rate": 8.049620828211307e-06, "loss": 2.6757, "step": 10076 }, { "epoch": 0.8262772226492442, "grad_norm": 0.34937912225723267, "learning_rate": 8.034874234663463e-06, "loss": 2.6546, "step": 10078 }, { "epoch": 0.8264411990776326, "grad_norm": 0.356227844953537, "learning_rate": 8.020139980862201e-06, "loss": 2.6799, "step": 10080 }, { "epoch": 0.826605175506021, "grad_norm": 0.34615686535835266, "learning_rate": 8.005418071140097e-06, "loss": 2.65, "step": 10082 }, { "epoch": 0.8267691519344095, "grad_norm": 0.34325796365737915, "learning_rate": 7.990708509826105e-06, "loss": 2.5944, "step": 10084 }, { "epoch": 0.8269331283627979, "grad_norm": 0.3419267237186432, "learning_rate": 7.97601130124554e-06, "loss": 2.6375, "step": 10086 }, { "epoch": 0.8270971047911863, "grad_norm": 0.3433661460876465, "learning_rate": 7.961326449720064e-06, "loss": 2.6903, "step": 10088 }, { "epoch": 0.8272610812195746, "grad_norm": 0.32648491859436035, "learning_rate": 7.946653959567735e-06, "loss": 2.6735, "step": 10090 }, { "epoch": 0.8274250576479631, "grad_norm": 0.3363194465637207, "learning_rate": 7.931993835102958e-06, "loss": 2.6413, "step": 10092 }, { "epoch": 0.8275890340763515, "grad_norm": 0.3481307029724121, "learning_rate": 7.917346080636512e-06, "loss": 2.6317, "step": 10094 }, { "epoch": 0.8277530105047399, "grad_norm": 0.3203509449958801, "learning_rate": 7.902710700475546e-06, "loss": 2.6527, "step": 10096 }, { "epoch": 0.8279169869331283, "grad_norm": 0.328740656375885, "learning_rate": 7.88808769892354e-06, "loss": 2.716, "step": 10098 }, { "epoch": 0.8280809633615168, "grad_norm": 0.3279756009578705, "learning_rate": 7.873477080280362e-06, "loss": 2.6902, "step": 10100 }, { "epoch": 0.8282449397899052, "grad_norm": 0.3307093381881714, "learning_rate": 7.858878848842227e-06, "loss": 2.6812, "step": 10102 }, { "epoch": 0.8284089162182936, "grad_norm": 0.3418760597705841, "learning_rate": 7.844293008901721e-06, "loss": 2.693, "step": 10104 }, { "epoch": 0.828572892646682, "grad_norm": 0.3490046262741089, "learning_rate": 7.829719564747751e-06, "loss": 2.6665, "step": 10106 }, { "epoch": 0.8287368690750705, "grad_norm": 0.3372333347797394, "learning_rate": 7.815158520665638e-06, "loss": 2.6465, "step": 10108 }, { "epoch": 0.8289008455034589, "grad_norm": 0.3342621624469757, "learning_rate": 7.800609880936999e-06, "loss": 2.684, "step": 10110 }, { "epoch": 0.8290648219318473, "grad_norm": 0.33143872022628784, "learning_rate": 7.786073649839831e-06, "loss": 2.6733, "step": 10112 }, { "epoch": 0.8292287983602357, "grad_norm": 0.35430461168289185, "learning_rate": 7.771549831648495e-06, "loss": 2.6724, "step": 10114 }, { "epoch": 0.8293927747886242, "grad_norm": 0.33693727850914, "learning_rate": 7.757038430633662e-06, "loss": 2.6547, "step": 10116 }, { "epoch": 0.8295567512170126, "grad_norm": 0.33749669790267944, "learning_rate": 7.74253945106238e-06, "loss": 2.6441, "step": 10118 }, { "epoch": 0.829720727645401, "grad_norm": 0.34288644790649414, "learning_rate": 7.728052897198052e-06, "loss": 2.7163, "step": 10120 }, { "epoch": 0.8298847040737894, "grad_norm": 0.33759576082229614, "learning_rate": 7.713578773300416e-06, "loss": 2.6814, "step": 10122 }, { "epoch": 0.8300486805021778, "grad_norm": 0.360008180141449, "learning_rate": 7.699117083625524e-06, "loss": 2.6735, "step": 10124 }, { "epoch": 0.8302126569305662, "grad_norm": 0.33299127221107483, "learning_rate": 7.684667832425846e-06, "loss": 2.6658, "step": 10126 }, { "epoch": 0.8303766333589546, "grad_norm": 0.3614155650138855, "learning_rate": 7.670231023950119e-06, "loss": 2.6884, "step": 10128 }, { "epoch": 0.8305406097873431, "grad_norm": 0.3424185514450073, "learning_rate": 7.655806662443465e-06, "loss": 2.6917, "step": 10130 }, { "epoch": 0.8307045862157315, "grad_norm": 0.33887729048728943, "learning_rate": 7.64139475214733e-06, "loss": 2.6231, "step": 10132 }, { "epoch": 0.8308685626441199, "grad_norm": 0.34644046425819397, "learning_rate": 7.626995297299511e-06, "loss": 2.6682, "step": 10134 }, { "epoch": 0.8310325390725083, "grad_norm": 0.3414541482925415, "learning_rate": 7.612608302134116e-06, "loss": 2.6723, "step": 10136 }, { "epoch": 0.8311965155008968, "grad_norm": 0.34266263246536255, "learning_rate": 7.598233770881619e-06, "loss": 2.6133, "step": 10138 }, { "epoch": 0.8313604919292852, "grad_norm": 0.33635443449020386, "learning_rate": 7.58387170776883e-06, "loss": 2.6741, "step": 10140 }, { "epoch": 0.8315244683576736, "grad_norm": 0.3483855426311493, "learning_rate": 7.56952211701884e-06, "loss": 2.6941, "step": 10142 }, { "epoch": 0.831688444786062, "grad_norm": 0.3535546064376831, "learning_rate": 7.555185002851156e-06, "loss": 2.6879, "step": 10144 }, { "epoch": 0.8318524212144505, "grad_norm": 0.35101404786109924, "learning_rate": 7.540860369481551e-06, "loss": 2.6776, "step": 10146 }, { "epoch": 0.8320163976428389, "grad_norm": 0.3552359640598297, "learning_rate": 7.526548221122148e-06, "loss": 2.6116, "step": 10148 }, { "epoch": 0.8321803740712272, "grad_norm": 0.35252535343170166, "learning_rate": 7.5122485619814055e-06, "loss": 2.6853, "step": 10150 }, { "epoch": 0.8323443504996156, "grad_norm": 0.34232112765312195, "learning_rate": 7.4979613962641195e-06, "loss": 2.661, "step": 10152 }, { "epoch": 0.8325083269280041, "grad_norm": 0.34504052996635437, "learning_rate": 7.4836867281713575e-06, "loss": 2.7006, "step": 10154 }, { "epoch": 0.8326723033563925, "grad_norm": 0.34139329195022583, "learning_rate": 7.469424561900601e-06, "loss": 2.6761, "step": 10156 }, { "epoch": 0.8328362797847809, "grad_norm": 0.3436908423900604, "learning_rate": 7.455174901645573e-06, "loss": 2.6624, "step": 10158 }, { "epoch": 0.8330002562131693, "grad_norm": 0.3399675786495209, "learning_rate": 7.440937751596361e-06, "loss": 2.6486, "step": 10160 }, { "epoch": 0.8331642326415578, "grad_norm": 0.33405208587646484, "learning_rate": 7.426713115939382e-06, "loss": 2.6356, "step": 10162 }, { "epoch": 0.8333282090699462, "grad_norm": 0.35486406087875366, "learning_rate": 7.412500998857325e-06, "loss": 2.6332, "step": 10164 }, { "epoch": 0.8334921854983346, "grad_norm": 0.3347385823726654, "learning_rate": 7.398301404529251e-06, "loss": 2.6291, "step": 10166 }, { "epoch": 0.833656161926723, "grad_norm": 0.34466326236724854, "learning_rate": 7.38411433713051e-06, "loss": 2.6578, "step": 10168 }, { "epoch": 0.8338201383551115, "grad_norm": 0.32677194476127625, "learning_rate": 7.3699398008327925e-06, "loss": 2.6522, "step": 10170 }, { "epoch": 0.8339841147834999, "grad_norm": 0.3286292254924774, "learning_rate": 7.355777799804053e-06, "loss": 2.6518, "step": 10172 }, { "epoch": 0.8341480912118883, "grad_norm": 0.3278927206993103, "learning_rate": 7.341628338208634e-06, "loss": 2.684, "step": 10174 }, { "epoch": 0.8343120676402767, "grad_norm": 0.33257821202278137, "learning_rate": 7.327491420207122e-06, "loss": 2.6454, "step": 10176 }, { "epoch": 0.8344760440686652, "grad_norm": 0.3482322096824646, "learning_rate": 7.313367049956455e-06, "loss": 2.6986, "step": 10178 }, { "epoch": 0.8346400204970535, "grad_norm": 0.3238534927368164, "learning_rate": 7.299255231609875e-06, "loss": 2.654, "step": 10180 }, { "epoch": 0.8348039969254419, "grad_norm": 0.36573687195777893, "learning_rate": 7.285155969316932e-06, "loss": 2.6541, "step": 10182 }, { "epoch": 0.8349679733538304, "grad_norm": 0.335008442401886, "learning_rate": 7.271069267223463e-06, "loss": 2.6986, "step": 10184 }, { "epoch": 0.8351319497822188, "grad_norm": 0.3391663134098053, "learning_rate": 7.256995129471645e-06, "loss": 2.653, "step": 10186 }, { "epoch": 0.8352959262106072, "grad_norm": 0.3508013188838959, "learning_rate": 7.242933560199949e-06, "loss": 2.6814, "step": 10188 }, { "epoch": 0.8354599026389956, "grad_norm": 0.3220774829387665, "learning_rate": 7.2288845635431145e-06, "loss": 2.6441, "step": 10190 }, { "epoch": 0.8356238790673841, "grad_norm": 0.35174351930618286, "learning_rate": 7.214848143632263e-06, "loss": 2.6576, "step": 10192 }, { "epoch": 0.8357878554957725, "grad_norm": 0.34564927220344543, "learning_rate": 7.200824304594739e-06, "loss": 2.713, "step": 10194 }, { "epoch": 0.8359518319241609, "grad_norm": 0.33946773409843445, "learning_rate": 7.186813050554226e-06, "loss": 2.6337, "step": 10196 }, { "epoch": 0.8361158083525493, "grad_norm": 0.34069371223449707, "learning_rate": 7.172814385630699e-06, "loss": 2.6811, "step": 10198 }, { "epoch": 0.8362797847809378, "grad_norm": 0.3616045415401459, "learning_rate": 7.158828313940452e-06, "loss": 2.6796, "step": 10200 }, { "epoch": 0.8364437612093262, "grad_norm": 0.34944960474967957, "learning_rate": 7.144854839596027e-06, "loss": 2.6917, "step": 10202 }, { "epoch": 0.8366077376377146, "grad_norm": 0.3647206127643585, "learning_rate": 7.130893966706304e-06, "loss": 2.7039, "step": 10204 }, { "epoch": 0.836771714066103, "grad_norm": 0.34225746989250183, "learning_rate": 7.116945699376448e-06, "loss": 2.6764, "step": 10206 }, { "epoch": 0.8369356904944915, "grad_norm": 0.3454033136367798, "learning_rate": 7.1030100417079085e-06, "loss": 2.6581, "step": 10208 }, { "epoch": 0.8370996669228798, "grad_norm": 0.3391875624656677, "learning_rate": 7.0890869977984494e-06, "loss": 2.6823, "step": 10210 }, { "epoch": 0.8372636433512682, "grad_norm": 0.34025901556015015, "learning_rate": 7.075176571742082e-06, "loss": 2.6603, "step": 10212 }, { "epoch": 0.8374276197796566, "grad_norm": 0.3389114737510681, "learning_rate": 7.061278767629148e-06, "loss": 2.6491, "step": 10214 }, { "epoch": 0.8375915962080451, "grad_norm": 0.3778437376022339, "learning_rate": 7.047393589546269e-06, "loss": 2.6886, "step": 10216 }, { "epoch": 0.8377555726364335, "grad_norm": 0.3391709625720978, "learning_rate": 7.033521041576352e-06, "loss": 2.6158, "step": 10218 }, { "epoch": 0.8379195490648219, "grad_norm": 0.3422224819660187, "learning_rate": 7.019661127798555e-06, "loss": 2.6282, "step": 10220 }, { "epoch": 0.8380835254932103, "grad_norm": 0.3342430293560028, "learning_rate": 7.005813852288401e-06, "loss": 2.711, "step": 10222 }, { "epoch": 0.8382475019215988, "grad_norm": 0.36175110936164856, "learning_rate": 6.991979219117617e-06, "loss": 2.6107, "step": 10224 }, { "epoch": 0.8384114783499872, "grad_norm": 0.33473923802375793, "learning_rate": 6.978157232354249e-06, "loss": 2.6845, "step": 10226 }, { "epoch": 0.8385754547783756, "grad_norm": 0.33205145597457886, "learning_rate": 6.964347896062634e-06, "loss": 2.6941, "step": 10228 }, { "epoch": 0.838739431206764, "grad_norm": 0.3306818902492523, "learning_rate": 6.950551214303352e-06, "loss": 2.6307, "step": 10230 }, { "epoch": 0.8389034076351525, "grad_norm": 0.3298625946044922, "learning_rate": 6.936767191133298e-06, "loss": 2.6588, "step": 10232 }, { "epoch": 0.8390673840635409, "grad_norm": 0.335028737783432, "learning_rate": 6.922995830605633e-06, "loss": 2.7251, "step": 10234 }, { "epoch": 0.8392313604919293, "grad_norm": 0.3306805193424225, "learning_rate": 6.909237136769797e-06, "loss": 2.6619, "step": 10236 }, { "epoch": 0.8393953369203178, "grad_norm": 0.335214227437973, "learning_rate": 6.895491113671476e-06, "loss": 2.6403, "step": 10238 }, { "epoch": 0.8395593133487061, "grad_norm": 0.32805973291397095, "learning_rate": 6.881757765352698e-06, "loss": 2.6837, "step": 10240 }, { "epoch": 0.8397232897770945, "grad_norm": 0.3360416889190674, "learning_rate": 6.868037095851687e-06, "loss": 2.6999, "step": 10242 }, { "epoch": 0.8398872662054829, "grad_norm": 0.32662519812583923, "learning_rate": 6.854329109202984e-06, "loss": 2.6577, "step": 10244 }, { "epoch": 0.8400512426338714, "grad_norm": 0.3512989580631256, "learning_rate": 6.840633809437391e-06, "loss": 2.7151, "step": 10246 }, { "epoch": 0.8402152190622598, "grad_norm": 0.3555261790752411, "learning_rate": 6.826951200581994e-06, "loss": 2.6911, "step": 10248 }, { "epoch": 0.8403791954906482, "grad_norm": 0.33757784962654114, "learning_rate": 6.813281286660106e-06, "loss": 2.6898, "step": 10250 }, { "epoch": 0.8405431719190366, "grad_norm": 0.3228794038295746, "learning_rate": 6.7996240716913405e-06, "loss": 2.632, "step": 10252 }, { "epoch": 0.8407071483474251, "grad_norm": 0.3640576899051666, "learning_rate": 6.7859795596915854e-06, "loss": 2.7054, "step": 10254 }, { "epoch": 0.8408711247758135, "grad_norm": 0.33922749757766724, "learning_rate": 6.772347754672947e-06, "loss": 2.7205, "step": 10256 }, { "epoch": 0.8410351012042019, "grad_norm": 0.3382910192012787, "learning_rate": 6.758728660643859e-06, "loss": 2.6362, "step": 10258 }, { "epoch": 0.8411990776325903, "grad_norm": 0.3458372950553894, "learning_rate": 6.745122281608962e-06, "loss": 2.7351, "step": 10260 }, { "epoch": 0.8413630540609788, "grad_norm": 0.33167314529418945, "learning_rate": 6.7315286215691886e-06, "loss": 2.6372, "step": 10262 }, { "epoch": 0.8415270304893672, "grad_norm": 0.34121838212013245, "learning_rate": 6.717947684521719e-06, "loss": 2.61, "step": 10264 }, { "epoch": 0.8416910069177556, "grad_norm": 0.3431921899318695, "learning_rate": 6.70437947446001e-06, "loss": 2.6465, "step": 10266 }, { "epoch": 0.841854983346144, "grad_norm": 0.34275180101394653, "learning_rate": 6.690823995373724e-06, "loss": 2.6733, "step": 10268 }, { "epoch": 0.8420189597745324, "grad_norm": 0.3301771581172943, "learning_rate": 6.677281251248868e-06, "loss": 2.729, "step": 10270 }, { "epoch": 0.8421829362029208, "grad_norm": 0.357749342918396, "learning_rate": 6.663751246067618e-06, "loss": 2.6573, "step": 10272 }, { "epoch": 0.8423469126313092, "grad_norm": 0.32637819647789, "learning_rate": 6.650233983808446e-06, "loss": 2.6124, "step": 10274 }, { "epoch": 0.8425108890596976, "grad_norm": 0.32940346002578735, "learning_rate": 6.636729468446096e-06, "loss": 2.6625, "step": 10276 }, { "epoch": 0.8426748654880861, "grad_norm": 0.34032365679740906, "learning_rate": 6.623237703951501e-06, "loss": 2.6689, "step": 10278 }, { "epoch": 0.8428388419164745, "grad_norm": 0.337433397769928, "learning_rate": 6.609758694291906e-06, "loss": 2.694, "step": 10280 }, { "epoch": 0.8430028183448629, "grad_norm": 0.3188422918319702, "learning_rate": 6.5962924434307735e-06, "loss": 2.7065, "step": 10282 }, { "epoch": 0.8431667947732513, "grad_norm": 0.3274192810058594, "learning_rate": 6.582838955327841e-06, "loss": 2.6506, "step": 10284 }, { "epoch": 0.8433307712016398, "grad_norm": 0.3237343728542328, "learning_rate": 6.569398233939039e-06, "loss": 2.7245, "step": 10286 }, { "epoch": 0.8434947476300282, "grad_norm": 0.34002062678337097, "learning_rate": 6.555970283216622e-06, "loss": 2.6613, "step": 10288 }, { "epoch": 0.8436587240584166, "grad_norm": 0.3255845308303833, "learning_rate": 6.542555107109016e-06, "loss": 2.6925, "step": 10290 }, { "epoch": 0.843822700486805, "grad_norm": 0.3381110727787018, "learning_rate": 6.529152709560937e-06, "loss": 2.6513, "step": 10292 }, { "epoch": 0.8439866769151935, "grad_norm": 0.3413912057876587, "learning_rate": 6.515763094513322e-06, "loss": 2.6676, "step": 10294 }, { "epoch": 0.8441506533435819, "grad_norm": 0.32500338554382324, "learning_rate": 6.502386265903365e-06, "loss": 2.6848, "step": 10296 }, { "epoch": 0.8443146297719702, "grad_norm": 0.3394920527935028, "learning_rate": 6.4890222276644816e-06, "loss": 2.6421, "step": 10298 }, { "epoch": 0.8444786062003588, "grad_norm": 0.3335939645767212, "learning_rate": 6.475670983726334e-06, "loss": 2.6116, "step": 10300 }, { "epoch": 0.8446425826287471, "grad_norm": 0.3229528069496155, "learning_rate": 6.462332538014837e-06, "loss": 2.6614, "step": 10302 }, { "epoch": 0.8448065590571355, "grad_norm": 0.34554025530815125, "learning_rate": 6.449006894452103e-06, "loss": 2.6446, "step": 10304 }, { "epoch": 0.8449705354855239, "grad_norm": 0.3376176655292511, "learning_rate": 6.435694056956548e-06, "loss": 2.658, "step": 10306 }, { "epoch": 0.8451345119139124, "grad_norm": 0.3239997327327728, "learning_rate": 6.422394029442741e-06, "loss": 2.6708, "step": 10308 }, { "epoch": 0.8452984883423008, "grad_norm": 0.3343709707260132, "learning_rate": 6.4091068158215474e-06, "loss": 2.6057, "step": 10310 }, { "epoch": 0.8454624647706892, "grad_norm": 0.3237256407737732, "learning_rate": 6.3958324200000255e-06, "loss": 2.6984, "step": 10312 }, { "epoch": 0.8456264411990776, "grad_norm": 0.32227352261543274, "learning_rate": 6.382570845881503e-06, "loss": 2.6428, "step": 10314 }, { "epoch": 0.8457904176274661, "grad_norm": 0.34628531336784363, "learning_rate": 6.369322097365488e-06, "loss": 2.6248, "step": 10316 }, { "epoch": 0.8459543940558545, "grad_norm": 0.32664191722869873, "learning_rate": 6.356086178347753e-06, "loss": 2.6905, "step": 10318 }, { "epoch": 0.8461183704842429, "grad_norm": 0.3484685719013214, "learning_rate": 6.342863092720292e-06, "loss": 2.669, "step": 10320 }, { "epoch": 0.8462823469126313, "grad_norm": 0.35194677114486694, "learning_rate": 6.329652844371325e-06, "loss": 2.6446, "step": 10322 }, { "epoch": 0.8464463233410198, "grad_norm": 0.3379284739494324, "learning_rate": 6.316455437185298e-06, "loss": 2.6632, "step": 10324 }, { "epoch": 0.8466102997694082, "grad_norm": 0.3462430238723755, "learning_rate": 6.3032708750428605e-06, "loss": 2.7172, "step": 10326 }, { "epoch": 0.8467742761977965, "grad_norm": 0.32815051078796387, "learning_rate": 6.290099161820911e-06, "loss": 2.6365, "step": 10328 }, { "epoch": 0.8469382526261849, "grad_norm": 0.3664245903491974, "learning_rate": 6.276940301392565e-06, "loss": 2.6752, "step": 10330 }, { "epoch": 0.8471022290545734, "grad_norm": 0.33695438504219055, "learning_rate": 6.263794297627163e-06, "loss": 2.6557, "step": 10332 }, { "epoch": 0.8472662054829618, "grad_norm": 0.3390887677669525, "learning_rate": 6.250661154390219e-06, "loss": 2.6847, "step": 10334 }, { "epoch": 0.8474301819113502, "grad_norm": 0.33502426743507385, "learning_rate": 6.237540875543557e-06, "loss": 2.6681, "step": 10336 }, { "epoch": 0.8475941583397386, "grad_norm": 0.3481208384037018, "learning_rate": 6.224433464945123e-06, "loss": 2.6607, "step": 10338 }, { "epoch": 0.8477581347681271, "grad_norm": 0.33429208397865295, "learning_rate": 6.211338926449128e-06, "loss": 2.6375, "step": 10340 }, { "epoch": 0.8479221111965155, "grad_norm": 0.34141895174980164, "learning_rate": 6.19825726390601e-06, "loss": 2.6728, "step": 10342 }, { "epoch": 0.8480860876249039, "grad_norm": 0.3465202748775482, "learning_rate": 6.185188481162374e-06, "loss": 2.6608, "step": 10344 }, { "epoch": 0.8482500640532923, "grad_norm": 0.3486677408218384, "learning_rate": 6.17213258206108e-06, "loss": 2.6338, "step": 10346 }, { "epoch": 0.8484140404816808, "grad_norm": 0.34063851833343506, "learning_rate": 6.159089570441184e-06, "loss": 2.6721, "step": 10348 }, { "epoch": 0.8485780169100692, "grad_norm": 0.3253879249095917, "learning_rate": 6.146059450137958e-06, "loss": 2.6752, "step": 10350 }, { "epoch": 0.8487419933384576, "grad_norm": 0.34873634576797485, "learning_rate": 6.133042224982849e-06, "loss": 2.5339, "step": 10352 }, { "epoch": 0.8489059697668461, "grad_norm": 0.3279608488082886, "learning_rate": 6.120037898803588e-06, "loss": 2.6381, "step": 10354 }, { "epoch": 0.8490699461952345, "grad_norm": 0.3308979570865631, "learning_rate": 6.1070464754240334e-06, "loss": 2.6616, "step": 10356 }, { "epoch": 0.8492339226236228, "grad_norm": 0.3344348073005676, "learning_rate": 6.0940679586642846e-06, "loss": 2.6912, "step": 10358 }, { "epoch": 0.8493978990520112, "grad_norm": 0.34096384048461914, "learning_rate": 6.0811023523406565e-06, "loss": 2.6387, "step": 10360 }, { "epoch": 0.8495618754803997, "grad_norm": 0.38651856780052185, "learning_rate": 6.068149660265654e-06, "loss": 2.6596, "step": 10362 }, { "epoch": 0.8497258519087881, "grad_norm": 0.3362831771373749, "learning_rate": 6.055209886247975e-06, "loss": 2.674, "step": 10364 }, { "epoch": 0.8498898283371765, "grad_norm": 0.3238201439380646, "learning_rate": 6.042283034092538e-06, "loss": 2.6572, "step": 10366 }, { "epoch": 0.8500538047655649, "grad_norm": 0.3375564217567444, "learning_rate": 6.0293691076004465e-06, "loss": 2.6981, "step": 10368 }, { "epoch": 0.8502177811939534, "grad_norm": 0.3450925052165985, "learning_rate": 6.016468110569018e-06, "loss": 2.6392, "step": 10370 }, { "epoch": 0.8503817576223418, "grad_norm": 0.3320460915565491, "learning_rate": 6.003580046791768e-06, "loss": 2.6454, "step": 10372 }, { "epoch": 0.8505457340507302, "grad_norm": 0.3344566524028778, "learning_rate": 5.990704920058377e-06, "loss": 2.649, "step": 10374 }, { "epoch": 0.8507097104791186, "grad_norm": 0.3382944166660309, "learning_rate": 5.977842734154765e-06, "loss": 2.6502, "step": 10376 }, { "epoch": 0.8508736869075071, "grad_norm": 0.32182577252388, "learning_rate": 5.964993492863019e-06, "loss": 2.648, "step": 10378 }, { "epoch": 0.8510376633358955, "grad_norm": 0.3318832814693451, "learning_rate": 5.952157199961439e-06, "loss": 2.6941, "step": 10380 }, { "epoch": 0.8512016397642839, "grad_norm": 0.31887030601501465, "learning_rate": 5.9393338592244825e-06, "loss": 2.6522, "step": 10382 }, { "epoch": 0.8513656161926723, "grad_norm": 0.33194801211357117, "learning_rate": 5.926523474422857e-06, "loss": 2.6395, "step": 10384 }, { "epoch": 0.8515295926210608, "grad_norm": 0.34195607900619507, "learning_rate": 5.913726049323398e-06, "loss": 2.6626, "step": 10386 }, { "epoch": 0.8516935690494492, "grad_norm": 0.3371245265007019, "learning_rate": 5.900941587689168e-06, "loss": 2.6325, "step": 10388 }, { "epoch": 0.8518575454778375, "grad_norm": 0.3308737874031067, "learning_rate": 5.888170093279421e-06, "loss": 2.6816, "step": 10390 }, { "epoch": 0.8520215219062259, "grad_norm": 0.3237842619419098, "learning_rate": 5.875411569849565e-06, "loss": 2.6616, "step": 10392 }, { "epoch": 0.8521854983346144, "grad_norm": 0.34219205379486084, "learning_rate": 5.862666021151214e-06, "loss": 2.684, "step": 10394 }, { "epoch": 0.8523494747630028, "grad_norm": 0.3327094614505768, "learning_rate": 5.849933450932177e-06, "loss": 2.6046, "step": 10396 }, { "epoch": 0.8525134511913912, "grad_norm": 0.33332833647727966, "learning_rate": 5.837213862936447e-06, "loss": 2.6625, "step": 10398 }, { "epoch": 0.8526774276197796, "grad_norm": 0.3409523069858551, "learning_rate": 5.82450726090415e-06, "loss": 2.6375, "step": 10400 }, { "epoch": 0.8528414040481681, "grad_norm": 0.3387695550918579, "learning_rate": 5.811813648571679e-06, "loss": 2.7021, "step": 10402 }, { "epoch": 0.8530053804765565, "grad_norm": 0.3380659222602844, "learning_rate": 5.799133029671527e-06, "loss": 2.682, "step": 10404 }, { "epoch": 0.8531693569049449, "grad_norm": 0.32091769576072693, "learning_rate": 5.786465407932417e-06, "loss": 2.7081, "step": 10406 }, { "epoch": 0.8533333333333334, "grad_norm": 0.3436664342880249, "learning_rate": 5.7738107870792255e-06, "loss": 2.6844, "step": 10408 }, { "epoch": 0.8534973097617218, "grad_norm": 0.33804869651794434, "learning_rate": 5.76116917083303e-06, "loss": 2.7, "step": 10410 }, { "epoch": 0.8536612861901102, "grad_norm": 0.3222094476222992, "learning_rate": 5.748540562911048e-06, "loss": 2.6341, "step": 10412 }, { "epoch": 0.8538252626184986, "grad_norm": 0.345938116312027, "learning_rate": 5.735924967026696e-06, "loss": 2.6474, "step": 10414 }, { "epoch": 0.8539892390468871, "grad_norm": 0.327473908662796, "learning_rate": 5.723322386889574e-06, "loss": 2.6513, "step": 10416 }, { "epoch": 0.8541532154752755, "grad_norm": 0.32559624314308167, "learning_rate": 5.710732826205412e-06, "loss": 2.6481, "step": 10418 }, { "epoch": 0.8543171919036638, "grad_norm": 0.33021292090415955, "learning_rate": 5.698156288676176e-06, "loss": 2.722, "step": 10420 }, { "epoch": 0.8544811683320522, "grad_norm": 0.32161247730255127, "learning_rate": 5.685592777999943e-06, "loss": 2.6617, "step": 10422 }, { "epoch": 0.8546451447604407, "grad_norm": 0.3295929729938507, "learning_rate": 5.673042297870995e-06, "loss": 2.6485, "step": 10424 }, { "epoch": 0.8548091211888291, "grad_norm": 0.34970468282699585, "learning_rate": 5.660504851979764e-06, "loss": 2.6689, "step": 10426 }, { "epoch": 0.8549730976172175, "grad_norm": 0.3305900990962982, "learning_rate": 5.647980444012874e-06, "loss": 2.7092, "step": 10428 }, { "epoch": 0.8551370740456059, "grad_norm": 0.35563620924949646, "learning_rate": 5.635469077653061e-06, "loss": 2.623, "step": 10430 }, { "epoch": 0.8553010504739944, "grad_norm": 0.3382416367530823, "learning_rate": 5.622970756579304e-06, "loss": 2.6243, "step": 10432 }, { "epoch": 0.8554650269023828, "grad_norm": 0.3244633674621582, "learning_rate": 5.610485484466682e-06, "loss": 2.6241, "step": 10434 }, { "epoch": 0.8556290033307712, "grad_norm": 0.3333638906478882, "learning_rate": 5.5980132649864625e-06, "loss": 2.6703, "step": 10436 }, { "epoch": 0.8557929797591596, "grad_norm": 0.3213784992694855, "learning_rate": 5.585554101806085e-06, "loss": 2.6672, "step": 10438 }, { "epoch": 0.8559569561875481, "grad_norm": 0.3316629230976105, "learning_rate": 5.573107998589117e-06, "loss": 2.6401, "step": 10440 }, { "epoch": 0.8561209326159365, "grad_norm": 0.32977238297462463, "learning_rate": 5.56067495899531e-06, "loss": 2.7614, "step": 10442 }, { "epoch": 0.8562849090443249, "grad_norm": 0.3326664865016937, "learning_rate": 5.548254986680584e-06, "loss": 2.7211, "step": 10444 }, { "epoch": 0.8564488854727133, "grad_norm": 0.3291242718696594, "learning_rate": 5.535848085297002e-06, "loss": 2.6509, "step": 10446 }, { "epoch": 0.8566128619011018, "grad_norm": 0.3376324772834778, "learning_rate": 5.523454258492755e-06, "loss": 2.6712, "step": 10448 }, { "epoch": 0.8567768383294901, "grad_norm": 0.3199458718299866, "learning_rate": 5.511073509912262e-06, "loss": 2.6672, "step": 10450 }, { "epoch": 0.8569408147578785, "grad_norm": 0.3401946425437927, "learning_rate": 5.498705843196017e-06, "loss": 2.622, "step": 10452 }, { "epoch": 0.8571047911862669, "grad_norm": 0.31917446851730347, "learning_rate": 5.486351261980727e-06, "loss": 2.6907, "step": 10454 }, { "epoch": 0.8572687676146554, "grad_norm": 0.33862030506134033, "learning_rate": 5.474009769899207e-06, "loss": 2.6784, "step": 10456 }, { "epoch": 0.8574327440430438, "grad_norm": 0.3215498626232147, "learning_rate": 5.461681370580474e-06, "loss": 2.6675, "step": 10458 }, { "epoch": 0.8575967204714322, "grad_norm": 0.34415945410728455, "learning_rate": 5.449366067649631e-06, "loss": 2.606, "step": 10460 }, { "epoch": 0.8577606968998206, "grad_norm": 0.31893390417099, "learning_rate": 5.437063864727982e-06, "loss": 2.6547, "step": 10462 }, { "epoch": 0.8579246733282091, "grad_norm": 0.32835182547569275, "learning_rate": 5.424774765432961e-06, "loss": 2.703, "step": 10464 }, { "epoch": 0.8580886497565975, "grad_norm": 0.34637555480003357, "learning_rate": 5.412498773378133e-06, "loss": 2.7045, "step": 10466 }, { "epoch": 0.8582526261849859, "grad_norm": 0.33090728521347046, "learning_rate": 5.40023589217325e-06, "loss": 2.6907, "step": 10468 }, { "epoch": 0.8584166026133744, "grad_norm": 0.344991534948349, "learning_rate": 5.387986125424166e-06, "loss": 2.6763, "step": 10470 }, { "epoch": 0.8585805790417628, "grad_norm": 0.33677831292152405, "learning_rate": 5.3757494767329e-06, "loss": 2.6974, "step": 10472 }, { "epoch": 0.8587445554701512, "grad_norm": 0.3438469469547272, "learning_rate": 5.3635259496976066e-06, "loss": 2.6752, "step": 10474 }, { "epoch": 0.8589085318985396, "grad_norm": 0.3369773328304291, "learning_rate": 5.3513155479126e-06, "loss": 2.6175, "step": 10476 }, { "epoch": 0.859072508326928, "grad_norm": 0.3290081322193146, "learning_rate": 5.339118274968297e-06, "loss": 2.6679, "step": 10478 }, { "epoch": 0.8592364847553164, "grad_norm": 0.3345021605491638, "learning_rate": 5.326934134451295e-06, "loss": 2.6691, "step": 10480 }, { "epoch": 0.8594004611837048, "grad_norm": 0.33626729249954224, "learning_rate": 5.3147631299443e-06, "loss": 2.6766, "step": 10482 }, { "epoch": 0.8595644376120932, "grad_norm": 0.34413790702819824, "learning_rate": 5.302605265026172e-06, "loss": 2.7208, "step": 10484 }, { "epoch": 0.8597284140404817, "grad_norm": 0.3380412757396698, "learning_rate": 5.290460543271919e-06, "loss": 2.6281, "step": 10486 }, { "epoch": 0.8598923904688701, "grad_norm": 0.36027809977531433, "learning_rate": 5.278328968252644e-06, "loss": 2.6884, "step": 10488 }, { "epoch": 0.8600563668972585, "grad_norm": 0.332934707403183, "learning_rate": 5.266210543535611e-06, "loss": 2.6548, "step": 10490 }, { "epoch": 0.8602203433256469, "grad_norm": 0.3373337388038635, "learning_rate": 5.254105272684223e-06, "loss": 2.6316, "step": 10492 }, { "epoch": 0.8603843197540354, "grad_norm": 0.35242360830307007, "learning_rate": 5.242013159258014e-06, "loss": 2.6844, "step": 10494 }, { "epoch": 0.8605482961824238, "grad_norm": 0.34526142477989197, "learning_rate": 5.229934206812609e-06, "loss": 2.679, "step": 10496 }, { "epoch": 0.8607122726108122, "grad_norm": 0.3308252990245819, "learning_rate": 5.217868418899835e-06, "loss": 2.6533, "step": 10498 }, { "epoch": 0.8608762490392006, "grad_norm": 0.3687964677810669, "learning_rate": 5.205815799067587e-06, "loss": 2.6727, "step": 10500 }, { "epoch": 0.8610402254675891, "grad_norm": 0.3394107520580292, "learning_rate": 5.193776350859908e-06, "loss": 2.6684, "step": 10502 }, { "epoch": 0.8612042018959775, "grad_norm": 0.332687646150589, "learning_rate": 5.181750077816988e-06, "loss": 2.6927, "step": 10504 }, { "epoch": 0.8613681783243659, "grad_norm": 0.3407759964466095, "learning_rate": 5.169736983475104e-06, "loss": 2.6486, "step": 10506 }, { "epoch": 0.8615321547527542, "grad_norm": 0.334994912147522, "learning_rate": 5.157737071366681e-06, "loss": 2.6705, "step": 10508 }, { "epoch": 0.8616961311811427, "grad_norm": 0.34132835268974304, "learning_rate": 5.145750345020273e-06, "loss": 2.7029, "step": 10510 }, { "epoch": 0.8618601076095311, "grad_norm": 0.3304557502269745, "learning_rate": 5.13377680796055e-06, "loss": 2.6627, "step": 10512 }, { "epoch": 0.8620240840379195, "grad_norm": 0.32373252511024475, "learning_rate": 5.121816463708285e-06, "loss": 2.6613, "step": 10514 }, { "epoch": 0.8621880604663079, "grad_norm": 0.3293614685535431, "learning_rate": 5.1098693157804135e-06, "loss": 2.6534, "step": 10516 }, { "epoch": 0.8623520368946964, "grad_norm": 0.33072492480278015, "learning_rate": 5.097935367689949e-06, "loss": 2.6383, "step": 10518 }, { "epoch": 0.8625160133230848, "grad_norm": 0.3250677287578583, "learning_rate": 5.08601462294605e-06, "loss": 2.64, "step": 10520 }, { "epoch": 0.8626799897514732, "grad_norm": 0.3213002383708954, "learning_rate": 5.074107085053975e-06, "loss": 2.6391, "step": 10522 }, { "epoch": 0.8628439661798617, "grad_norm": 0.32554301619529724, "learning_rate": 5.062212757515122e-06, "loss": 2.6693, "step": 10524 }, { "epoch": 0.8630079426082501, "grad_norm": 0.3247552514076233, "learning_rate": 5.050331643826972e-06, "loss": 2.6856, "step": 10526 }, { "epoch": 0.8631719190366385, "grad_norm": 0.33183425664901733, "learning_rate": 5.038463747483152e-06, "loss": 2.6228, "step": 10528 }, { "epoch": 0.8633358954650269, "grad_norm": 0.3335568308830261, "learning_rate": 5.026609071973387e-06, "loss": 2.6212, "step": 10530 }, { "epoch": 0.8634998718934154, "grad_norm": 0.34651485085487366, "learning_rate": 5.014767620783495e-06, "loss": 2.6793, "step": 10532 }, { "epoch": 0.8636638483218038, "grad_norm": 0.33603301644325256, "learning_rate": 5.0029393973954675e-06, "loss": 2.6513, "step": 10534 }, { "epoch": 0.8638278247501922, "grad_norm": 0.329553484916687, "learning_rate": 4.991124405287334e-06, "loss": 2.6494, "step": 10536 }, { "epoch": 0.8639918011785805, "grad_norm": 0.31472229957580566, "learning_rate": 4.9793226479332715e-06, "loss": 2.697, "step": 10538 }, { "epoch": 0.864155777606969, "grad_norm": 0.34090685844421387, "learning_rate": 4.967534128803564e-06, "loss": 2.6608, "step": 10540 }, { "epoch": 0.8643197540353574, "grad_norm": 0.31787538528442383, "learning_rate": 4.955758851364606e-06, "loss": 2.6843, "step": 10542 }, { "epoch": 0.8644837304637458, "grad_norm": 0.3270115852355957, "learning_rate": 4.943996819078861e-06, "loss": 2.6486, "step": 10544 }, { "epoch": 0.8646477068921342, "grad_norm": 0.3245372772216797, "learning_rate": 4.932248035404968e-06, "loss": 2.6572, "step": 10546 }, { "epoch": 0.8648116833205227, "grad_norm": 0.3437480032444, "learning_rate": 4.9205125037975974e-06, "loss": 2.6488, "step": 10548 }, { "epoch": 0.8649756597489111, "grad_norm": 0.3401133120059967, "learning_rate": 4.90879022770756e-06, "loss": 2.6762, "step": 10550 }, { "epoch": 0.8651396361772995, "grad_norm": 0.32351604104042053, "learning_rate": 4.897081210581783e-06, "loss": 2.6952, "step": 10552 }, { "epoch": 0.8653036126056879, "grad_norm": 0.3311297297477722, "learning_rate": 4.88538545586325e-06, "loss": 2.6462, "step": 10554 }, { "epoch": 0.8654675890340764, "grad_norm": 0.3420843183994293, "learning_rate": 4.873702966991078e-06, "loss": 2.6729, "step": 10556 }, { "epoch": 0.8656315654624648, "grad_norm": 0.3342722952365875, "learning_rate": 4.86203374740048e-06, "loss": 2.6541, "step": 10558 }, { "epoch": 0.8657955418908532, "grad_norm": 0.34238192439079285, "learning_rate": 4.850377800522771e-06, "loss": 2.698, "step": 10560 }, { "epoch": 0.8659595183192416, "grad_norm": 0.33533889055252075, "learning_rate": 4.838735129785326e-06, "loss": 2.6663, "step": 10562 }, { "epoch": 0.8661234947476301, "grad_norm": 0.3264818489551544, "learning_rate": 4.827105738611682e-06, "loss": 2.6903, "step": 10564 }, { "epoch": 0.8662874711760185, "grad_norm": 0.3580157160758972, "learning_rate": 4.8154896304214046e-06, "loss": 2.6715, "step": 10566 }, { "epoch": 0.8664514476044068, "grad_norm": 0.340491384267807, "learning_rate": 4.803886808630198e-06, "loss": 2.6333, "step": 10568 }, { "epoch": 0.8666154240327952, "grad_norm": 0.3309878408908844, "learning_rate": 4.7922972766498374e-06, "loss": 2.6992, "step": 10570 }, { "epoch": 0.8667794004611837, "grad_norm": 0.34904196858406067, "learning_rate": 4.780721037888214e-06, "loss": 2.6281, "step": 10572 }, { "epoch": 0.8669433768895721, "grad_norm": 0.32914790511131287, "learning_rate": 4.769158095749271e-06, "loss": 2.6095, "step": 10574 }, { "epoch": 0.8671073533179605, "grad_norm": 0.3340021073818207, "learning_rate": 4.7576084536330725e-06, "loss": 2.6764, "step": 10576 }, { "epoch": 0.8672713297463489, "grad_norm": 0.3312329649925232, "learning_rate": 4.746072114935774e-06, "loss": 2.6681, "step": 10578 }, { "epoch": 0.8674353061747374, "grad_norm": 0.3388982117176056, "learning_rate": 4.734549083049577e-06, "loss": 2.642, "step": 10580 }, { "epoch": 0.8675992826031258, "grad_norm": 0.320384681224823, "learning_rate": 4.723039361362852e-06, "loss": 2.6109, "step": 10582 }, { "epoch": 0.8677632590315142, "grad_norm": 0.33212751150131226, "learning_rate": 4.711542953259962e-06, "loss": 2.6626, "step": 10584 }, { "epoch": 0.8679272354599027, "grad_norm": 0.3205034136772156, "learning_rate": 4.70005986212142e-06, "loss": 2.6465, "step": 10586 }, { "epoch": 0.8680912118882911, "grad_norm": 0.32602164149284363, "learning_rate": 4.688590091323791e-06, "loss": 2.6622, "step": 10588 }, { "epoch": 0.8682551883166795, "grad_norm": 0.34370824694633484, "learning_rate": 4.677133644239756e-06, "loss": 2.6931, "step": 10590 }, { "epoch": 0.8684191647450679, "grad_norm": 0.3423410654067993, "learning_rate": 4.6656905242380315e-06, "loss": 2.6645, "step": 10592 }, { "epoch": 0.8685831411734564, "grad_norm": 0.32793718576431274, "learning_rate": 4.654260734683441e-06, "loss": 2.6953, "step": 10594 }, { "epoch": 0.8687471176018448, "grad_norm": 0.3326193690299988, "learning_rate": 4.642844278936903e-06, "loss": 2.662, "step": 10596 }, { "epoch": 0.8689110940302331, "grad_norm": 0.32330694794654846, "learning_rate": 4.6314411603553875e-06, "loss": 2.6902, "step": 10598 }, { "epoch": 0.8690750704586215, "grad_norm": 0.3292381167411804, "learning_rate": 4.620051382291973e-06, "loss": 2.6614, "step": 10600 }, { "epoch": 0.86923904688701, "grad_norm": 0.3296934962272644, "learning_rate": 4.60867494809577e-06, "loss": 2.7171, "step": 10602 }, { "epoch": 0.8694030233153984, "grad_norm": 0.33611395955085754, "learning_rate": 4.597311861112008e-06, "loss": 2.6749, "step": 10604 }, { "epoch": 0.8695669997437868, "grad_norm": 0.3237344026565552, "learning_rate": 4.585962124681975e-06, "loss": 2.6436, "step": 10606 }, { "epoch": 0.8697309761721752, "grad_norm": 0.3220781981945038, "learning_rate": 4.574625742143035e-06, "loss": 2.6599, "step": 10608 }, { "epoch": 0.8698949526005637, "grad_norm": 0.33701395988464355, "learning_rate": 4.56330271682861e-06, "loss": 2.6248, "step": 10610 }, { "epoch": 0.8700589290289521, "grad_norm": 0.33247870206832886, "learning_rate": 4.551993052068237e-06, "loss": 2.6284, "step": 10612 }, { "epoch": 0.8702229054573405, "grad_norm": 0.32626116275787354, "learning_rate": 4.540696751187473e-06, "loss": 2.6595, "step": 10614 }, { "epoch": 0.8703868818857289, "grad_norm": 0.3327313959598541, "learning_rate": 4.529413817507971e-06, "loss": 2.6187, "step": 10616 }, { "epoch": 0.8705508583141174, "grad_norm": 0.3243194818496704, "learning_rate": 4.518144254347473e-06, "loss": 2.644, "step": 10618 }, { "epoch": 0.8707148347425058, "grad_norm": 0.32434871792793274, "learning_rate": 4.506888065019738e-06, "loss": 2.6614, "step": 10620 }, { "epoch": 0.8708788111708942, "grad_norm": 0.3304283022880554, "learning_rate": 4.495645252834635e-06, "loss": 2.656, "step": 10622 }, { "epoch": 0.8710427875992826, "grad_norm": 0.3269564211368561, "learning_rate": 4.484415821098087e-06, "loss": 2.6833, "step": 10624 }, { "epoch": 0.871206764027671, "grad_norm": 0.32656538486480713, "learning_rate": 4.473199773112091e-06, "loss": 2.661, "step": 10626 }, { "epoch": 0.8713707404560594, "grad_norm": 0.322100430727005, "learning_rate": 4.46199711217467e-06, "loss": 2.6734, "step": 10628 }, { "epoch": 0.8715347168844478, "grad_norm": 0.32710719108581543, "learning_rate": 4.450807841579979e-06, "loss": 2.7216, "step": 10630 }, { "epoch": 0.8716986933128362, "grad_norm": 0.333551824092865, "learning_rate": 4.439631964618168e-06, "loss": 2.6899, "step": 10632 }, { "epoch": 0.8718626697412247, "grad_norm": 0.3152640759944916, "learning_rate": 4.428469484575493e-06, "loss": 2.6643, "step": 10634 }, { "epoch": 0.8720266461696131, "grad_norm": 0.3360022008419037, "learning_rate": 4.417320404734243e-06, "loss": 2.6348, "step": 10636 }, { "epoch": 0.8721906225980015, "grad_norm": 0.3313817083835602, "learning_rate": 4.406184728372798e-06, "loss": 2.6035, "step": 10638 }, { "epoch": 0.87235459902639, "grad_norm": 0.32090920209884644, "learning_rate": 4.395062458765553e-06, "loss": 2.6566, "step": 10640 }, { "epoch": 0.8725185754547784, "grad_norm": 0.33037859201431274, "learning_rate": 4.3839535991829996e-06, "loss": 2.6454, "step": 10642 }, { "epoch": 0.8726825518831668, "grad_norm": 0.3234282433986664, "learning_rate": 4.372858152891662e-06, "loss": 2.6975, "step": 10644 }, { "epoch": 0.8728465283115552, "grad_norm": 0.32501181960105896, "learning_rate": 4.361776123154143e-06, "loss": 2.6137, "step": 10646 }, { "epoch": 0.8730105047399437, "grad_norm": 0.34077832102775574, "learning_rate": 4.350707513229091e-06, "loss": 2.6421, "step": 10648 }, { "epoch": 0.8731744811683321, "grad_norm": 0.3236035406589508, "learning_rate": 4.339652326371185e-06, "loss": 2.6522, "step": 10650 }, { "epoch": 0.8733384575967205, "grad_norm": 0.32151779532432556, "learning_rate": 4.328610565831181e-06, "loss": 2.6704, "step": 10652 }, { "epoch": 0.8735024340251089, "grad_norm": 0.32704460620880127, "learning_rate": 4.3175822348558934e-06, "loss": 2.6487, "step": 10654 }, { "epoch": 0.8736664104534974, "grad_norm": 0.32412615418434143, "learning_rate": 4.3065673366881834e-06, "loss": 2.6693, "step": 10656 }, { "epoch": 0.8738303868818857, "grad_norm": 0.3246254622936249, "learning_rate": 4.295565874566926e-06, "loss": 2.6176, "step": 10658 }, { "epoch": 0.8739943633102741, "grad_norm": 0.3276645541191101, "learning_rate": 4.2845778517271115e-06, "loss": 2.616, "step": 10660 }, { "epoch": 0.8741583397386625, "grad_norm": 0.3321981728076935, "learning_rate": 4.273603271399712e-06, "loss": 2.6534, "step": 10662 }, { "epoch": 0.874322316167051, "grad_norm": 0.32941991090774536, "learning_rate": 4.2626421368117956e-06, "loss": 2.5966, "step": 10664 }, { "epoch": 0.8744862925954394, "grad_norm": 0.3411455750465393, "learning_rate": 4.2516944511864554e-06, "loss": 2.6573, "step": 10666 }, { "epoch": 0.8746502690238278, "grad_norm": 0.3415534496307373, "learning_rate": 4.240760217742823e-06, "loss": 2.6598, "step": 10668 }, { "epoch": 0.8748142454522162, "grad_norm": 0.320156067609787, "learning_rate": 4.229839439696093e-06, "loss": 2.669, "step": 10670 }, { "epoch": 0.8749782218806047, "grad_norm": 0.3234598636627197, "learning_rate": 4.21893212025749e-06, "loss": 2.6356, "step": 10672 }, { "epoch": 0.8751421983089931, "grad_norm": 0.3388175070285797, "learning_rate": 4.208038262634295e-06, "loss": 2.6433, "step": 10674 }, { "epoch": 0.8753061747373815, "grad_norm": 0.3253480792045593, "learning_rate": 4.1971578700297996e-06, "loss": 2.6821, "step": 10676 }, { "epoch": 0.8754701511657699, "grad_norm": 0.3269323408603668, "learning_rate": 4.18629094564339e-06, "loss": 2.6294, "step": 10678 }, { "epoch": 0.8756341275941584, "grad_norm": 0.329959511756897, "learning_rate": 4.175437492670431e-06, "loss": 2.6965, "step": 10680 }, { "epoch": 0.8757981040225468, "grad_norm": 0.3268936574459076, "learning_rate": 4.164597514302365e-06, "loss": 2.7049, "step": 10682 }, { "epoch": 0.8759620804509352, "grad_norm": 0.31825578212738037, "learning_rate": 4.153771013726665e-06, "loss": 2.6262, "step": 10684 }, { "epoch": 0.8761260568793235, "grad_norm": 0.3271213173866272, "learning_rate": 4.14295799412684e-06, "loss": 2.6666, "step": 10686 }, { "epoch": 0.876290033307712, "grad_norm": 0.3360469341278076, "learning_rate": 4.132158458682422e-06, "loss": 2.6606, "step": 10688 }, { "epoch": 0.8764540097361004, "grad_norm": 0.32281503081321716, "learning_rate": 4.1213724105690026e-06, "loss": 2.6778, "step": 10690 }, { "epoch": 0.8766179861644888, "grad_norm": 0.33436885476112366, "learning_rate": 4.110599852958181e-06, "loss": 2.6336, "step": 10692 }, { "epoch": 0.8767819625928773, "grad_norm": 0.33220407366752625, "learning_rate": 4.099840789017606e-06, "loss": 2.693, "step": 10694 }, { "epoch": 0.8769459390212657, "grad_norm": 0.3349092900753021, "learning_rate": 4.089095221910971e-06, "loss": 2.6796, "step": 10696 }, { "epoch": 0.8771099154496541, "grad_norm": 0.31555473804473877, "learning_rate": 4.078363154797954e-06, "loss": 2.6619, "step": 10698 }, { "epoch": 0.8772738918780425, "grad_norm": 0.3380037546157837, "learning_rate": 4.06764459083433e-06, "loss": 2.7203, "step": 10700 }, { "epoch": 0.877437868306431, "grad_norm": 0.3178458511829376, "learning_rate": 4.056939533171839e-06, "loss": 2.5835, "step": 10702 }, { "epoch": 0.8776018447348194, "grad_norm": 0.32130786776542664, "learning_rate": 4.046247984958285e-06, "loss": 2.6982, "step": 10704 }, { "epoch": 0.8777658211632078, "grad_norm": 0.33036139607429504, "learning_rate": 4.035569949337498e-06, "loss": 2.6601, "step": 10706 }, { "epoch": 0.8779297975915962, "grad_norm": 0.32956549525260925, "learning_rate": 4.024905429449339e-06, "loss": 2.6544, "step": 10708 }, { "epoch": 0.8780937740199847, "grad_norm": 0.3229324519634247, "learning_rate": 4.014254428429659e-06, "loss": 2.6357, "step": 10710 }, { "epoch": 0.8782577504483731, "grad_norm": 0.3299851715564728, "learning_rate": 4.003616949410377e-06, "loss": 2.6415, "step": 10712 }, { "epoch": 0.8784217268767615, "grad_norm": 0.3361757695674896, "learning_rate": 3.992992995519423e-06, "loss": 2.6448, "step": 10714 }, { "epoch": 0.8785857033051498, "grad_norm": 0.3305593430995941, "learning_rate": 3.982382569880722e-06, "loss": 2.6518, "step": 10716 }, { "epoch": 0.8787496797335383, "grad_norm": 0.34519267082214355, "learning_rate": 3.9717856756142815e-06, "loss": 2.6518, "step": 10718 }, { "epoch": 0.8789136561619267, "grad_norm": 0.3227837383747101, "learning_rate": 3.96120231583606e-06, "loss": 2.6938, "step": 10720 }, { "epoch": 0.8790776325903151, "grad_norm": 0.3229362964630127, "learning_rate": 3.950632493658085e-06, "loss": 2.6862, "step": 10722 }, { "epoch": 0.8792416090187035, "grad_norm": 0.3313765823841095, "learning_rate": 3.940076212188381e-06, "loss": 2.6807, "step": 10724 }, { "epoch": 0.879405585447092, "grad_norm": 0.33440741896629333, "learning_rate": 3.929533474531016e-06, "loss": 2.6862, "step": 10726 }, { "epoch": 0.8795695618754804, "grad_norm": 0.31740376353263855, "learning_rate": 3.919004283786032e-06, "loss": 2.6011, "step": 10728 }, { "epoch": 0.8797335383038688, "grad_norm": 0.3292343020439148, "learning_rate": 3.908488643049524e-06, "loss": 2.6685, "step": 10730 }, { "epoch": 0.8798975147322572, "grad_norm": 0.3406120538711548, "learning_rate": 3.897986555413591e-06, "loss": 2.6928, "step": 10732 }, { "epoch": 0.8800614911606457, "grad_norm": 0.32527080178260803, "learning_rate": 3.887498023966346e-06, "loss": 2.6828, "step": 10734 }, { "epoch": 0.8802254675890341, "grad_norm": 0.33537909388542175, "learning_rate": 3.8770230517919286e-06, "loss": 2.7461, "step": 10736 }, { "epoch": 0.8803894440174225, "grad_norm": 0.32823824882507324, "learning_rate": 3.866561641970462e-06, "loss": 2.6487, "step": 10738 }, { "epoch": 0.8805534204458109, "grad_norm": 0.3205508291721344, "learning_rate": 3.8561137975781095e-06, "loss": 2.6377, "step": 10740 }, { "epoch": 0.8807173968741994, "grad_norm": 0.331306517124176, "learning_rate": 3.845679521687029e-06, "loss": 2.6753, "step": 10742 }, { "epoch": 0.8808813733025878, "grad_norm": 0.3367360532283783, "learning_rate": 3.835258817365406e-06, "loss": 2.718, "step": 10744 }, { "epoch": 0.8810453497309761, "grad_norm": 0.3306530714035034, "learning_rate": 3.8248516876774055e-06, "loss": 2.6874, "step": 10746 }, { "epoch": 0.8812093261593645, "grad_norm": 0.3342745304107666, "learning_rate": 3.814458135683241e-06, "loss": 2.6348, "step": 10748 }, { "epoch": 0.881373302587753, "grad_norm": 0.3220575451850891, "learning_rate": 3.8040781644390957e-06, "loss": 2.6722, "step": 10750 }, { "epoch": 0.8815372790161414, "grad_norm": 0.32037273049354553, "learning_rate": 3.7937117769971832e-06, "loss": 2.6553, "step": 10752 }, { "epoch": 0.8817012554445298, "grad_norm": 0.33724814653396606, "learning_rate": 3.783358976405721e-06, "loss": 2.6891, "step": 10754 }, { "epoch": 0.8818652318729183, "grad_norm": 0.32085883617401123, "learning_rate": 3.773019765708913e-06, "loss": 2.6065, "step": 10756 }, { "epoch": 0.8820292083013067, "grad_norm": 0.3302493989467621, "learning_rate": 3.7626941479469825e-06, "loss": 2.7057, "step": 10758 }, { "epoch": 0.8821931847296951, "grad_norm": 0.322139173746109, "learning_rate": 3.7523821261561565e-06, "loss": 2.6563, "step": 10760 }, { "epoch": 0.8823571611580835, "grad_norm": 0.31395354866981506, "learning_rate": 3.742083703368665e-06, "loss": 2.6835, "step": 10762 }, { "epoch": 0.882521137586472, "grad_norm": 0.3188331425189972, "learning_rate": 3.7317988826127195e-06, "loss": 2.6302, "step": 10764 }, { "epoch": 0.8826851140148604, "grad_norm": 0.3252706825733185, "learning_rate": 3.721527666912572e-06, "loss": 2.6172, "step": 10766 }, { "epoch": 0.8828490904432488, "grad_norm": 0.32820388674736023, "learning_rate": 3.71127005928843e-06, "loss": 2.6889, "step": 10768 }, { "epoch": 0.8830130668716372, "grad_norm": 0.32060661911964417, "learning_rate": 3.701026062756524e-06, "loss": 2.6493, "step": 10770 }, { "epoch": 0.8831770433000257, "grad_norm": 0.33316943049430847, "learning_rate": 3.6907956803290735e-06, "loss": 2.6655, "step": 10772 }, { "epoch": 0.8833410197284141, "grad_norm": 0.3209678530693054, "learning_rate": 3.6805789150143155e-06, "loss": 2.6705, "step": 10774 }, { "epoch": 0.8835049961568024, "grad_norm": 0.3332614004611969, "learning_rate": 3.670375769816442e-06, "loss": 2.6661, "step": 10776 }, { "epoch": 0.8836689725851908, "grad_norm": 0.3414953351020813, "learning_rate": 3.660186247735675e-06, "loss": 2.655, "step": 10778 }, { "epoch": 0.8838329490135793, "grad_norm": 0.3352857828140259, "learning_rate": 3.650010351768224e-06, "loss": 2.6154, "step": 10780 }, { "epoch": 0.8839969254419677, "grad_norm": 0.32666221261024475, "learning_rate": 3.639848084906272e-06, "loss": 2.6449, "step": 10782 }, { "epoch": 0.8841609018703561, "grad_norm": 0.32529404759407043, "learning_rate": 3.6296994501380355e-06, "loss": 2.6205, "step": 10784 }, { "epoch": 0.8843248782987445, "grad_norm": 0.31446075439453125, "learning_rate": 3.619564450447671e-06, "loss": 2.6182, "step": 10786 }, { "epoch": 0.884488854727133, "grad_norm": 0.3259067237377167, "learning_rate": 3.6094430888153617e-06, "loss": 2.6604, "step": 10788 }, { "epoch": 0.8846528311555214, "grad_norm": 0.32716208696365356, "learning_rate": 3.5993353682172715e-06, "loss": 2.6491, "step": 10790 }, { "epoch": 0.8848168075839098, "grad_norm": 0.3239395022392273, "learning_rate": 3.5892412916255557e-06, "loss": 2.6885, "step": 10792 }, { "epoch": 0.8849807840122982, "grad_norm": 0.327812522649765, "learning_rate": 3.579160862008335e-06, "loss": 2.7365, "step": 10794 }, { "epoch": 0.8851447604406867, "grad_norm": 0.32002153992652893, "learning_rate": 3.5690940823297658e-06, "loss": 2.6538, "step": 10796 }, { "epoch": 0.8853087368690751, "grad_norm": 0.3370496928691864, "learning_rate": 3.559040955549936e-06, "loss": 2.6649, "step": 10798 }, { "epoch": 0.8854727132974635, "grad_norm": 0.3300323784351349, "learning_rate": 3.549001484624953e-06, "loss": 2.6204, "step": 10800 }, { "epoch": 0.8856366897258519, "grad_norm": 0.322567343711853, "learning_rate": 3.5389756725069113e-06, "loss": 2.5659, "step": 10802 }, { "epoch": 0.8858006661542404, "grad_norm": 0.3354808986186981, "learning_rate": 3.528963522143852e-06, "loss": 2.6113, "step": 10804 }, { "epoch": 0.8859646425826287, "grad_norm": 0.3249177634716034, "learning_rate": 3.5189650364798432e-06, "loss": 2.6471, "step": 10806 }, { "epoch": 0.8861286190110171, "grad_norm": 0.3191516399383545, "learning_rate": 3.5089802184549048e-06, "loss": 2.6694, "step": 10808 }, { "epoch": 0.8862925954394056, "grad_norm": 0.33032816648483276, "learning_rate": 3.499009071005066e-06, "loss": 2.6166, "step": 10810 }, { "epoch": 0.886456571867794, "grad_norm": 0.3214799463748932, "learning_rate": 3.4890515970622928e-06, "loss": 2.6632, "step": 10812 }, { "epoch": 0.8866205482961824, "grad_norm": 0.32267218828201294, "learning_rate": 3.4791077995545875e-06, "loss": 2.6901, "step": 10814 }, { "epoch": 0.8867845247245708, "grad_norm": 0.3270774185657501, "learning_rate": 3.4691776814058774e-06, "loss": 2.6289, "step": 10816 }, { "epoch": 0.8869485011529593, "grad_norm": 0.3144334852695465, "learning_rate": 3.4592612455360994e-06, "loss": 2.6261, "step": 10818 }, { "epoch": 0.8871124775813477, "grad_norm": 0.322626531124115, "learning_rate": 3.44935849486116e-06, "loss": 2.6945, "step": 10820 }, { "epoch": 0.8872764540097361, "grad_norm": 0.3266322612762451, "learning_rate": 3.439469432292941e-06, "loss": 2.6494, "step": 10822 }, { "epoch": 0.8874404304381245, "grad_norm": 0.32379788160324097, "learning_rate": 3.4295940607392885e-06, "loss": 2.595, "step": 10824 }, { "epoch": 0.887604406866513, "grad_norm": 0.3194182515144348, "learning_rate": 3.419732383104035e-06, "loss": 2.6601, "step": 10826 }, { "epoch": 0.8877683832949014, "grad_norm": 0.31350353360176086, "learning_rate": 3.409884402286995e-06, "loss": 2.6611, "step": 10828 }, { "epoch": 0.8879323597232898, "grad_norm": 0.31211867928504944, "learning_rate": 3.4000501211839176e-06, "loss": 2.6752, "step": 10830 }, { "epoch": 0.8880963361516782, "grad_norm": 0.31430208683013916, "learning_rate": 3.3902295426865905e-06, "loss": 2.6434, "step": 10832 }, { "epoch": 0.8882603125800667, "grad_norm": 0.3272738754749298, "learning_rate": 3.3804226696826978e-06, "loss": 2.6326, "step": 10834 }, { "epoch": 0.888424289008455, "grad_norm": 0.3435550332069397, "learning_rate": 3.370629505055933e-06, "loss": 2.6167, "step": 10836 }, { "epoch": 0.8885882654368434, "grad_norm": 0.33443543314933777, "learning_rate": 3.3608500516859655e-06, "loss": 2.6692, "step": 10838 }, { "epoch": 0.8887522418652318, "grad_norm": 0.3236541450023651, "learning_rate": 3.3510843124484216e-06, "loss": 2.6329, "step": 10840 }, { "epoch": 0.8889162182936203, "grad_norm": 0.3231198787689209, "learning_rate": 3.341332290214877e-06, "loss": 2.6365, "step": 10842 }, { "epoch": 0.8890801947220087, "grad_norm": 0.3229336142539978, "learning_rate": 3.3315939878529045e-06, "loss": 2.6209, "step": 10844 }, { "epoch": 0.8892441711503971, "grad_norm": 0.33615022897720337, "learning_rate": 3.321869408226019e-06, "loss": 2.6548, "step": 10846 }, { "epoch": 0.8894081475787855, "grad_norm": 0.3172944188117981, "learning_rate": 3.3121585541937173e-06, "loss": 2.6681, "step": 10848 }, { "epoch": 0.889572124007174, "grad_norm": 0.3268885910511017, "learning_rate": 3.302461428611464e-06, "loss": 2.6629, "step": 10850 }, { "epoch": 0.8897361004355624, "grad_norm": 0.3293801546096802, "learning_rate": 3.2927780343306514e-06, "loss": 2.693, "step": 10852 }, { "epoch": 0.8899000768639508, "grad_norm": 0.32890743017196655, "learning_rate": 3.2831083741986733e-06, "loss": 2.6578, "step": 10854 }, { "epoch": 0.8900640532923392, "grad_norm": 0.3258358836174011, "learning_rate": 3.273452451058867e-06, "loss": 2.6458, "step": 10856 }, { "epoch": 0.8902280297207277, "grad_norm": 0.32360926270484924, "learning_rate": 3.2638102677505445e-06, "loss": 2.6956, "step": 10858 }, { "epoch": 0.8903920061491161, "grad_norm": 0.32284998893737793, "learning_rate": 3.254181827108943e-06, "loss": 2.6506, "step": 10860 }, { "epoch": 0.8905559825775045, "grad_norm": 0.31723472476005554, "learning_rate": 3.244567131965315e-06, "loss": 2.6323, "step": 10862 }, { "epoch": 0.890719959005893, "grad_norm": 0.32693344354629517, "learning_rate": 3.234966185146815e-06, "loss": 2.6928, "step": 10864 }, { "epoch": 0.8908839354342813, "grad_norm": 0.3123942017555237, "learning_rate": 3.225378989476585e-06, "loss": 2.6351, "step": 10866 }, { "epoch": 0.8910479118626697, "grad_norm": 0.32146239280700684, "learning_rate": 3.2158055477737257e-06, "loss": 2.6877, "step": 10868 }, { "epoch": 0.8912118882910581, "grad_norm": 0.32358887791633606, "learning_rate": 3.2062458628532743e-06, "loss": 2.6527, "step": 10870 }, { "epoch": 0.8913758647194466, "grad_norm": 0.33025139570236206, "learning_rate": 3.196699937526232e-06, "loss": 2.6154, "step": 10872 }, { "epoch": 0.891539841147835, "grad_norm": 0.3247290849685669, "learning_rate": 3.187167774599564e-06, "loss": 2.6882, "step": 10874 }, { "epoch": 0.8917038175762234, "grad_norm": 0.33716461062431335, "learning_rate": 3.1776493768761796e-06, "loss": 2.6223, "step": 10876 }, { "epoch": 0.8918677940046118, "grad_norm": 0.32597222924232483, "learning_rate": 3.1681447471549274e-06, "loss": 2.7004, "step": 10878 }, { "epoch": 0.8920317704330003, "grad_norm": 0.31761476397514343, "learning_rate": 3.1586538882306504e-06, "loss": 2.6357, "step": 10880 }, { "epoch": 0.8921957468613887, "grad_norm": 0.32472214102745056, "learning_rate": 3.1491768028940884e-06, "loss": 2.6468, "step": 10882 }, { "epoch": 0.8923597232897771, "grad_norm": 0.3508470952510834, "learning_rate": 3.139713493931962e-06, "loss": 2.665, "step": 10884 }, { "epoch": 0.8925236997181655, "grad_norm": 0.3303401470184326, "learning_rate": 3.130263964126934e-06, "loss": 2.6655, "step": 10886 }, { "epoch": 0.892687676146554, "grad_norm": 0.3118439316749573, "learning_rate": 3.120828216257632e-06, "loss": 2.6721, "step": 10888 }, { "epoch": 0.8928516525749424, "grad_norm": 0.31891563534736633, "learning_rate": 3.111406253098592e-06, "loss": 2.6562, "step": 10890 }, { "epoch": 0.8930156290033308, "grad_norm": 0.32649263739585876, "learning_rate": 3.101998077420332e-06, "loss": 2.6433, "step": 10892 }, { "epoch": 0.8931796054317191, "grad_norm": 0.3189006745815277, "learning_rate": 3.0926036919893097e-06, "loss": 2.679, "step": 10894 }, { "epoch": 0.8933435818601076, "grad_norm": 0.33281025290489197, "learning_rate": 3.0832230995679e-06, "loss": 2.6921, "step": 10896 }, { "epoch": 0.893507558288496, "grad_norm": 0.3271988332271576, "learning_rate": 3.073856302914474e-06, "loss": 2.6322, "step": 10898 }, { "epoch": 0.8936715347168844, "grad_norm": 0.3197652995586395, "learning_rate": 3.0645033047832892e-06, "loss": 2.6741, "step": 10900 }, { "epoch": 0.8938355111452728, "grad_norm": 0.32472512125968933, "learning_rate": 3.0551641079245906e-06, "loss": 2.6677, "step": 10902 }, { "epoch": 0.8939994875736613, "grad_norm": 0.32138141989707947, "learning_rate": 3.0458387150845424e-06, "loss": 2.7149, "step": 10904 }, { "epoch": 0.8941634640020497, "grad_norm": 0.3243404030799866, "learning_rate": 3.036527129005257e-06, "loss": 2.6496, "step": 10906 }, { "epoch": 0.8943274404304381, "grad_norm": 1.194546103477478, "learning_rate": 3.027229352424765e-06, "loss": 2.6607, "step": 10908 }, { "epoch": 0.8944914168588265, "grad_norm": 0.32675227522850037, "learning_rate": 3.017945388077087e-06, "loss": 2.6827, "step": 10910 }, { "epoch": 0.894655393287215, "grad_norm": 0.33102577924728394, "learning_rate": 3.0086752386921325e-06, "loss": 2.6599, "step": 10912 }, { "epoch": 0.8948193697156034, "grad_norm": 0.32758015394210815, "learning_rate": 2.999418906995766e-06, "loss": 2.6225, "step": 10914 }, { "epoch": 0.8949833461439918, "grad_norm": 0.3146892488002777, "learning_rate": 2.990176395709804e-06, "loss": 2.6125, "step": 10916 }, { "epoch": 0.8951473225723802, "grad_norm": 0.3234754800796509, "learning_rate": 2.980947707551962e-06, "loss": 2.6689, "step": 10918 }, { "epoch": 0.8953112990007687, "grad_norm": 0.313838928937912, "learning_rate": 2.9717328452359307e-06, "loss": 2.6054, "step": 10920 }, { "epoch": 0.8954752754291571, "grad_norm": 0.32194143533706665, "learning_rate": 2.9625318114713195e-06, "loss": 2.6448, "step": 10922 }, { "epoch": 0.8956392518575454, "grad_norm": 0.3123278021812439, "learning_rate": 2.953344608963671e-06, "loss": 2.7136, "step": 10924 }, { "epoch": 0.895803228285934, "grad_norm": 0.3286539912223816, "learning_rate": 2.9441712404144396e-06, "loss": 2.6744, "step": 10926 }, { "epoch": 0.8959672047143223, "grad_norm": 0.34007197618484497, "learning_rate": 2.935011708521063e-06, "loss": 2.6902, "step": 10928 }, { "epoch": 0.8961311811427107, "grad_norm": 0.32450494170188904, "learning_rate": 2.9258660159768635e-06, "loss": 2.7009, "step": 10930 }, { "epoch": 0.8962951575710991, "grad_norm": 0.32629474997520447, "learning_rate": 2.9167341654711135e-06, "loss": 2.6378, "step": 10932 }, { "epoch": 0.8964591339994876, "grad_norm": 0.3427327275276184, "learning_rate": 2.9076161596890086e-06, "loss": 2.6958, "step": 10934 }, { "epoch": 0.896623110427876, "grad_norm": 0.32507121562957764, "learning_rate": 2.8985120013116875e-06, "loss": 2.669, "step": 10936 }, { "epoch": 0.8967870868562644, "grad_norm": 0.3211694359779358, "learning_rate": 2.8894216930161924e-06, "loss": 2.6765, "step": 10938 }, { "epoch": 0.8969510632846528, "grad_norm": 0.3326278626918793, "learning_rate": 2.880345237475518e-06, "loss": 2.6824, "step": 10940 }, { "epoch": 0.8971150397130413, "grad_norm": 0.33012109994888306, "learning_rate": 2.8712826373585744e-06, "loss": 2.6241, "step": 10942 }, { "epoch": 0.8972790161414297, "grad_norm": 0.312504380941391, "learning_rate": 2.86223389533018e-06, "loss": 2.6739, "step": 10944 }, { "epoch": 0.8974429925698181, "grad_norm": 0.3131183087825775, "learning_rate": 2.8531990140511223e-06, "loss": 2.6585, "step": 10946 }, { "epoch": 0.8976069689982065, "grad_norm": 0.32728302478790283, "learning_rate": 2.8441779961780713e-06, "loss": 2.7079, "step": 10948 }, { "epoch": 0.897770945426595, "grad_norm": 0.31666114926338196, "learning_rate": 2.8351708443636436e-06, "loss": 2.6613, "step": 10950 }, { "epoch": 0.8979349218549834, "grad_norm": 0.3198012113571167, "learning_rate": 2.82617756125636e-06, "loss": 2.6368, "step": 10952 }, { "epoch": 0.8980988982833717, "grad_norm": 0.3156406283378601, "learning_rate": 2.8171981495006937e-06, "loss": 2.6655, "step": 10954 }, { "epoch": 0.8982628747117601, "grad_norm": 0.3288864195346832, "learning_rate": 2.8082326117369884e-06, "loss": 2.6497, "step": 10956 }, { "epoch": 0.8984268511401486, "grad_norm": 0.3348493278026581, "learning_rate": 2.7992809506015795e-06, "loss": 2.6983, "step": 10958 }, { "epoch": 0.898590827568537, "grad_norm": 0.31900572776794434, "learning_rate": 2.7903431687266447e-06, "loss": 2.6816, "step": 10960 }, { "epoch": 0.8987548039969254, "grad_norm": 0.3322133421897888, "learning_rate": 2.781419268740343e-06, "loss": 2.6748, "step": 10962 }, { "epoch": 0.8989187804253138, "grad_norm": 0.3232822120189667, "learning_rate": 2.77250925326672e-06, "loss": 2.6834, "step": 10964 }, { "epoch": 0.8990827568537023, "grad_norm": 0.3240607678890228, "learning_rate": 2.7636131249257344e-06, "loss": 2.6548, "step": 10966 }, { "epoch": 0.8992467332820907, "grad_norm": 0.3249566853046417, "learning_rate": 2.7547308863332788e-06, "loss": 2.7173, "step": 10968 }, { "epoch": 0.8994107097104791, "grad_norm": 0.33513227105140686, "learning_rate": 2.7458625401011572e-06, "loss": 2.6664, "step": 10970 }, { "epoch": 0.8995746861388675, "grad_norm": 0.32000359892845154, "learning_rate": 2.7370080888370895e-06, "loss": 2.6475, "step": 10972 }, { "epoch": 0.899738662567256, "grad_norm": 0.3284592926502228, "learning_rate": 2.7281675351446877e-06, "loss": 2.6479, "step": 10974 }, { "epoch": 0.8999026389956444, "grad_norm": 0.31511369347572327, "learning_rate": 2.719340881623522e-06, "loss": 2.6777, "step": 10976 }, { "epoch": 0.9000666154240328, "grad_norm": 0.3201831877231598, "learning_rate": 2.7105281308690278e-06, "loss": 2.6546, "step": 10978 }, { "epoch": 0.9002305918524213, "grad_norm": 0.31835728883743286, "learning_rate": 2.701729285472582e-06, "loss": 2.6561, "step": 10980 }, { "epoch": 0.9003945682808097, "grad_norm": 0.3243107199668884, "learning_rate": 2.6929443480214643e-06, "loss": 2.6691, "step": 10982 }, { "epoch": 0.900558544709198, "grad_norm": 0.3279881179332733, "learning_rate": 2.684173321098876e-06, "loss": 2.6651, "step": 10984 }, { "epoch": 0.9007225211375864, "grad_norm": 0.31579625606536865, "learning_rate": 2.6754162072838927e-06, "loss": 2.6857, "step": 10986 }, { "epoch": 0.9008864975659749, "grad_norm": 0.3239685297012329, "learning_rate": 2.666673009151538e-06, "loss": 2.6703, "step": 10988 }, { "epoch": 0.9010504739943633, "grad_norm": 0.31615307927131653, "learning_rate": 2.6579437292727384e-06, "loss": 2.6413, "step": 10990 }, { "epoch": 0.9012144504227517, "grad_norm": 0.3143499493598938, "learning_rate": 2.6492283702142906e-06, "loss": 2.6372, "step": 10992 }, { "epoch": 0.9013784268511401, "grad_norm": 0.32308295369148254, "learning_rate": 2.6405269345389506e-06, "loss": 2.6435, "step": 10994 }, { "epoch": 0.9015424032795286, "grad_norm": 0.32196369767189026, "learning_rate": 2.6318394248053435e-06, "loss": 2.6412, "step": 10996 }, { "epoch": 0.901706379707917, "grad_norm": 0.32773125171661377, "learning_rate": 2.6231658435680152e-06, "loss": 2.6525, "step": 10998 }, { "epoch": 0.9018703561363054, "grad_norm": 0.3210839331150055, "learning_rate": 2.6145061933774083e-06, "loss": 2.7325, "step": 11000 }, { "epoch": 0.9020343325646938, "grad_norm": 0.3330003321170807, "learning_rate": 2.605860476779881e-06, "loss": 2.6782, "step": 11002 }, { "epoch": 0.9021983089930823, "grad_norm": 0.32509610056877136, "learning_rate": 2.597228696317672e-06, "loss": 2.5923, "step": 11004 }, { "epoch": 0.9023622854214707, "grad_norm": 0.33983367681503296, "learning_rate": 2.58861085452895e-06, "loss": 2.6791, "step": 11006 }, { "epoch": 0.9025262618498591, "grad_norm": 0.3296528458595276, "learning_rate": 2.5800069539477557e-06, "loss": 2.6375, "step": 11008 }, { "epoch": 0.9026902382782475, "grad_norm": 0.33064815402030945, "learning_rate": 2.571416997104059e-06, "loss": 2.6663, "step": 11010 }, { "epoch": 0.902854214706636, "grad_norm": 0.3220709562301636, "learning_rate": 2.5628409865237235e-06, "loss": 2.6839, "step": 11012 }, { "epoch": 0.9030181911350244, "grad_norm": 0.30855557322502136, "learning_rate": 2.5542789247284814e-06, "loss": 2.6324, "step": 11014 }, { "epoch": 0.9031821675634127, "grad_norm": 0.3161405622959137, "learning_rate": 2.5457308142360027e-06, "loss": 2.6351, "step": 11016 }, { "epoch": 0.9033461439918011, "grad_norm": 0.3267020881175995, "learning_rate": 2.5371966575598317e-06, "loss": 2.6908, "step": 11018 }, { "epoch": 0.9035101204201896, "grad_norm": 0.3190624713897705, "learning_rate": 2.5286764572094336e-06, "loss": 2.6568, "step": 11020 }, { "epoch": 0.903674096848578, "grad_norm": 0.327904611825943, "learning_rate": 2.5201702156901206e-06, "loss": 2.6689, "step": 11022 }, { "epoch": 0.9038380732769664, "grad_norm": 0.3408558964729309, "learning_rate": 2.5116779355031707e-06, "loss": 2.6583, "step": 11024 }, { "epoch": 0.9040020497053548, "grad_norm": 0.3213607668876648, "learning_rate": 2.5031996191456965e-06, "loss": 2.6015, "step": 11026 }, { "epoch": 0.9041660261337433, "grad_norm": 0.32054775953292847, "learning_rate": 2.4947352691107317e-06, "loss": 2.6801, "step": 11028 }, { "epoch": 0.9043300025621317, "grad_norm": 0.318186491727829, "learning_rate": 2.4862848878872024e-06, "loss": 2.6599, "step": 11030 }, { "epoch": 0.9044939789905201, "grad_norm": 0.32675546407699585, "learning_rate": 2.4778484779599155e-06, "loss": 2.6926, "step": 11032 }, { "epoch": 0.9046579554189085, "grad_norm": 0.3117828965187073, "learning_rate": 2.4694260418095805e-06, "loss": 2.6064, "step": 11034 }, { "epoch": 0.904821931847297, "grad_norm": 0.33182188868522644, "learning_rate": 2.4610175819128e-06, "loss": 2.5932, "step": 11036 }, { "epoch": 0.9049859082756854, "grad_norm": 0.31492504477500916, "learning_rate": 2.452623100742063e-06, "loss": 2.6289, "step": 11038 }, { "epoch": 0.9051498847040738, "grad_norm": 0.32689952850341797, "learning_rate": 2.444242600765728e-06, "loss": 2.6412, "step": 11040 }, { "epoch": 0.9053138611324623, "grad_norm": 0.3316658139228821, "learning_rate": 2.4358760844480854e-06, "loss": 2.6976, "step": 11042 }, { "epoch": 0.9054778375608507, "grad_norm": 0.31415751576423645, "learning_rate": 2.427523554249278e-06, "loss": 2.6658, "step": 11044 }, { "epoch": 0.905641813989239, "grad_norm": 0.3253602981567383, "learning_rate": 2.419185012625347e-06, "loss": 2.6999, "step": 11046 }, { "epoch": 0.9058057904176274, "grad_norm": 0.33021947741508484, "learning_rate": 2.410860462028225e-06, "loss": 2.6622, "step": 11048 }, { "epoch": 0.9059697668460159, "grad_norm": 0.32295936346054077, "learning_rate": 2.402549904905732e-06, "loss": 2.6025, "step": 11050 }, { "epoch": 0.9061337432744043, "grad_norm": 0.3200579881668091, "learning_rate": 2.3942533437015524e-06, "loss": 2.6571, "step": 11052 }, { "epoch": 0.9062977197027927, "grad_norm": 0.32763394713401794, "learning_rate": 2.385970780855279e-06, "loss": 2.7029, "step": 11054 }, { "epoch": 0.9064616961311811, "grad_norm": 0.3201748728752136, "learning_rate": 2.3777022188023913e-06, "loss": 2.6803, "step": 11056 }, { "epoch": 0.9066256725595696, "grad_norm": 0.3198850750923157, "learning_rate": 2.369447659974211e-06, "loss": 2.6755, "step": 11058 }, { "epoch": 0.906789648987958, "grad_norm": 0.31570103764533997, "learning_rate": 2.3612071067980025e-06, "loss": 2.6288, "step": 11060 }, { "epoch": 0.9069536254163464, "grad_norm": 0.3176112473011017, "learning_rate": 2.3529805616968602e-06, "loss": 2.6808, "step": 11062 }, { "epoch": 0.9071176018447348, "grad_norm": 0.3328532576560974, "learning_rate": 2.3447680270897886e-06, "loss": 2.658, "step": 11064 }, { "epoch": 0.9072815782731233, "grad_norm": 0.32274332642555237, "learning_rate": 2.3365695053916668e-06, "loss": 2.6513, "step": 11066 }, { "epoch": 0.9074455547015117, "grad_norm": 0.30922210216522217, "learning_rate": 2.3283849990132556e-06, "loss": 2.5595, "step": 11068 }, { "epoch": 0.9076095311299001, "grad_norm": 0.31638219952583313, "learning_rate": 2.3202145103611682e-06, "loss": 2.6801, "step": 11070 }, { "epoch": 0.9077735075582885, "grad_norm": 0.31160736083984375, "learning_rate": 2.3120580418379443e-06, "loss": 2.6082, "step": 11072 }, { "epoch": 0.907937483986677, "grad_norm": 0.33331605792045593, "learning_rate": 2.3039155958419546e-06, "loss": 2.6803, "step": 11074 }, { "epoch": 0.9081014604150653, "grad_norm": 0.3144294321537018, "learning_rate": 2.295787174767472e-06, "loss": 2.5728, "step": 11076 }, { "epoch": 0.9082654368434537, "grad_norm": 0.3164895474910736, "learning_rate": 2.2876727810046515e-06, "loss": 2.6728, "step": 11078 }, { "epoch": 0.9084294132718421, "grad_norm": 0.3208268880844116, "learning_rate": 2.2795724169394905e-06, "loss": 2.72, "step": 11080 }, { "epoch": 0.9085933897002306, "grad_norm": 0.31076040863990784, "learning_rate": 2.2714860849538943e-06, "loss": 2.6982, "step": 11082 }, { "epoch": 0.908757366128619, "grad_norm": 0.32251042127609253, "learning_rate": 2.2634137874256276e-06, "loss": 2.7064, "step": 11084 }, { "epoch": 0.9089213425570074, "grad_norm": 0.3169712722301483, "learning_rate": 2.2553555267283423e-06, "loss": 2.6554, "step": 11086 }, { "epoch": 0.9090853189853958, "grad_norm": 0.3183402121067047, "learning_rate": 2.24731130523152e-06, "loss": 2.6737, "step": 11088 }, { "epoch": 0.9092492954137843, "grad_norm": 0.3115294277667999, "learning_rate": 2.2392811253005797e-06, "loss": 2.6545, "step": 11090 }, { "epoch": 0.9094132718421727, "grad_norm": 0.31985417008399963, "learning_rate": 2.231264989296755e-06, "loss": 2.7366, "step": 11092 }, { "epoch": 0.9095772482705611, "grad_norm": 0.34194216132164, "learning_rate": 2.2232628995771777e-06, "loss": 2.618, "step": 11094 }, { "epoch": 0.9097412246989496, "grad_norm": 0.32142728567123413, "learning_rate": 2.215274858494848e-06, "loss": 2.6948, "step": 11096 }, { "epoch": 0.909905201127338, "grad_norm": 0.3126019537448883, "learning_rate": 2.2073008683986317e-06, "loss": 2.656, "step": 11098 }, { "epoch": 0.9100691775557264, "grad_norm": 0.32221144437789917, "learning_rate": 2.1993409316332535e-06, "loss": 2.6476, "step": 11100 }, { "epoch": 0.9102331539841148, "grad_norm": 0.32423529028892517, "learning_rate": 2.1913950505393134e-06, "loss": 2.7393, "step": 11102 }, { "epoch": 0.9103971304125033, "grad_norm": 0.32624271512031555, "learning_rate": 2.183463227453292e-06, "loss": 2.6421, "step": 11104 }, { "epoch": 0.9105611068408916, "grad_norm": 0.32407236099243164, "learning_rate": 2.175545464707501e-06, "loss": 2.6296, "step": 11106 }, { "epoch": 0.91072508326928, "grad_norm": 0.3189644515514374, "learning_rate": 2.1676417646301685e-06, "loss": 2.6533, "step": 11108 }, { "epoch": 0.9108890596976684, "grad_norm": 0.317101389169693, "learning_rate": 2.1597521295453392e-06, "loss": 2.6429, "step": 11110 }, { "epoch": 0.9110530361260569, "grad_norm": 0.3202165365219116, "learning_rate": 2.151876561772942e-06, "loss": 2.656, "step": 11112 }, { "epoch": 0.9112170125544453, "grad_norm": 0.3179476261138916, "learning_rate": 2.144015063628779e-06, "loss": 2.6331, "step": 11114 }, { "epoch": 0.9113809889828337, "grad_norm": 0.3203252851963043, "learning_rate": 2.136167637424502e-06, "loss": 2.6584, "step": 11116 }, { "epoch": 0.9115449654112221, "grad_norm": 0.32069724798202515, "learning_rate": 2.1283342854676204e-06, "loss": 2.625, "step": 11118 }, { "epoch": 0.9117089418396106, "grad_norm": 0.318382203578949, "learning_rate": 2.1205150100615245e-06, "loss": 2.6468, "step": 11120 }, { "epoch": 0.911872918267999, "grad_norm": 0.31401869654655457, "learning_rate": 2.1127098135054424e-06, "loss": 2.617, "step": 11122 }, { "epoch": 0.9120368946963874, "grad_norm": 0.324240505695343, "learning_rate": 2.1049186980944825e-06, "loss": 2.6537, "step": 11124 }, { "epoch": 0.9122008711247758, "grad_norm": 0.30565333366394043, "learning_rate": 2.0971416661196065e-06, "loss": 2.655, "step": 11126 }, { "epoch": 0.9123648475531643, "grad_norm": 0.3273899257183075, "learning_rate": 2.0893787198676187e-06, "loss": 2.6504, "step": 11128 }, { "epoch": 0.9125288239815527, "grad_norm": 0.32581984996795654, "learning_rate": 2.0816298616212037e-06, "loss": 2.6918, "step": 11130 }, { "epoch": 0.912692800409941, "grad_norm": 0.3232536017894745, "learning_rate": 2.073895093658895e-06, "loss": 2.6523, "step": 11132 }, { "epoch": 0.9128567768383294, "grad_norm": 0.32395365834236145, "learning_rate": 2.0661744182550903e-06, "loss": 2.648, "step": 11134 }, { "epoch": 0.9130207532667179, "grad_norm": 0.3082559108734131, "learning_rate": 2.058467837680017e-06, "loss": 2.6558, "step": 11136 }, { "epoch": 0.9131847296951063, "grad_norm": 0.32950127124786377, "learning_rate": 2.050775354199802e-06, "loss": 2.7411, "step": 11138 }, { "epoch": 0.9133487061234947, "grad_norm": 0.3364517390727997, "learning_rate": 2.04309697007638e-06, "loss": 2.6728, "step": 11140 }, { "epoch": 0.9135126825518831, "grad_norm": 0.3355138599872589, "learning_rate": 2.0354326875675677e-06, "loss": 2.6528, "step": 11142 }, { "epoch": 0.9136766589802716, "grad_norm": 0.32453155517578125, "learning_rate": 2.02778250892704e-06, "loss": 2.6969, "step": 11144 }, { "epoch": 0.91384063540866, "grad_norm": 0.3184633255004883, "learning_rate": 2.020146436404302e-06, "loss": 2.5946, "step": 11146 }, { "epoch": 0.9140046118370484, "grad_norm": 0.32798850536346436, "learning_rate": 2.0125244722447257e-06, "loss": 2.6631, "step": 11148 }, { "epoch": 0.9141685882654369, "grad_norm": 0.33115652203559875, "learning_rate": 2.004916618689534e-06, "loss": 2.6535, "step": 11150 }, { "epoch": 0.9143325646938253, "grad_norm": 0.32368654012680054, "learning_rate": 1.997322877975799e-06, "loss": 2.6628, "step": 11152 }, { "epoch": 0.9144965411222137, "grad_norm": 0.3099714517593384, "learning_rate": 1.989743252336429e-06, "loss": 2.64, "step": 11154 }, { "epoch": 0.9146605175506021, "grad_norm": 0.32100746035575867, "learning_rate": 1.9821777440002245e-06, "loss": 2.6708, "step": 11156 }, { "epoch": 0.9148244939789906, "grad_norm": 0.3093791604042053, "learning_rate": 1.9746263551917777e-06, "loss": 2.6831, "step": 11158 }, { "epoch": 0.914988470407379, "grad_norm": 0.31829023361206055, "learning_rate": 1.967089088131563e-06, "loss": 2.724, "step": 11160 }, { "epoch": 0.9151524468357674, "grad_norm": 0.33710429072380066, "learning_rate": 1.9595659450359018e-06, "loss": 2.6956, "step": 11162 }, { "epoch": 0.9153164232641557, "grad_norm": 0.32171329855918884, "learning_rate": 1.9520569281169633e-06, "loss": 2.6661, "step": 11164 }, { "epoch": 0.9154803996925442, "grad_norm": 0.31376808881759644, "learning_rate": 1.9445620395827426e-06, "loss": 2.6178, "step": 11166 }, { "epoch": 0.9156443761209326, "grad_norm": 0.32705920934677124, "learning_rate": 1.937081281637093e-06, "loss": 2.6687, "step": 11168 }, { "epoch": 0.915808352549321, "grad_norm": 0.31647932529449463, "learning_rate": 1.929614656479728e-06, "loss": 2.651, "step": 11170 }, { "epoch": 0.9159723289777094, "grad_norm": 0.3209375739097595, "learning_rate": 1.922162166306174e-06, "loss": 2.5947, "step": 11172 }, { "epoch": 0.9161363054060979, "grad_norm": 0.3193795084953308, "learning_rate": 1.9147238133078337e-06, "loss": 2.6364, "step": 11174 }, { "epoch": 0.9163002818344863, "grad_norm": 0.31521522998809814, "learning_rate": 1.9072995996719301e-06, "loss": 2.6691, "step": 11176 }, { "epoch": 0.9164642582628747, "grad_norm": 0.3275092542171478, "learning_rate": 1.8998895275815333e-06, "loss": 2.6654, "step": 11178 }, { "epoch": 0.9166282346912631, "grad_norm": 0.32219240069389343, "learning_rate": 1.8924935992155613e-06, "loss": 2.6388, "step": 11180 }, { "epoch": 0.9167922111196516, "grad_norm": 0.31625470519065857, "learning_rate": 1.8851118167487746e-06, "loss": 2.6762, "step": 11182 }, { "epoch": 0.91695618754804, "grad_norm": 0.32175636291503906, "learning_rate": 1.877744182351754e-06, "loss": 2.6797, "step": 11184 }, { "epoch": 0.9171201639764284, "grad_norm": 0.33392706513404846, "learning_rate": 1.8703906981909492e-06, "loss": 2.6698, "step": 11186 }, { "epoch": 0.9172841404048168, "grad_norm": 0.3232085704803467, "learning_rate": 1.8630513664286308e-06, "loss": 2.6527, "step": 11188 }, { "epoch": 0.9174481168332053, "grad_norm": 0.32504111528396606, "learning_rate": 1.8557261892229061e-06, "loss": 2.6564, "step": 11190 }, { "epoch": 0.9176120932615937, "grad_norm": 0.3210766911506653, "learning_rate": 1.8484151687277408e-06, "loss": 2.6554, "step": 11192 }, { "epoch": 0.917776069689982, "grad_norm": 0.3067858815193176, "learning_rate": 1.8411183070929094e-06, "loss": 2.6674, "step": 11194 }, { "epoch": 0.9179400461183704, "grad_norm": 0.31239691376686096, "learning_rate": 1.8338356064640406e-06, "loss": 2.6571, "step": 11196 }, { "epoch": 0.9181040225467589, "grad_norm": 0.3248264789581299, "learning_rate": 1.8265670689825931e-06, "loss": 2.6036, "step": 11198 }, { "epoch": 0.9182679989751473, "grad_norm": 0.32788315415382385, "learning_rate": 1.8193126967858742e-06, "loss": 2.6925, "step": 11200 }, { "epoch": 0.9184319754035357, "grad_norm": 0.3062846064567566, "learning_rate": 1.8120724920069943e-06, "loss": 2.6442, "step": 11202 }, { "epoch": 0.9185959518319241, "grad_norm": 0.3721127212047577, "learning_rate": 1.8048464567749445e-06, "loss": 2.6867, "step": 11204 }, { "epoch": 0.9187599282603126, "grad_norm": 0.31917721033096313, "learning_rate": 1.7976345932145034e-06, "loss": 2.6964, "step": 11206 }, { "epoch": 0.918923904688701, "grad_norm": 0.32456323504447937, "learning_rate": 1.7904369034463076e-06, "loss": 2.7141, "step": 11208 }, { "epoch": 0.9190878811170894, "grad_norm": 0.3234582543373108, "learning_rate": 1.7832533895868252e-06, "loss": 2.6188, "step": 11210 }, { "epoch": 0.9192518575454779, "grad_norm": 0.3249300718307495, "learning_rate": 1.7760840537483558e-06, "loss": 2.7013, "step": 11212 }, { "epoch": 0.9194158339738663, "grad_norm": 0.32744210958480835, "learning_rate": 1.7689288980390128e-06, "loss": 2.6416, "step": 11214 }, { "epoch": 0.9195798104022547, "grad_norm": 0.3174097239971161, "learning_rate": 1.7617879245627577e-06, "loss": 2.645, "step": 11216 }, { "epoch": 0.9197437868306431, "grad_norm": 0.31958192586898804, "learning_rate": 1.7546611354193887e-06, "loss": 2.6149, "step": 11218 }, { "epoch": 0.9199077632590316, "grad_norm": 0.3266759216785431, "learning_rate": 1.747548532704496e-06, "loss": 2.6528, "step": 11220 }, { "epoch": 0.92007173968742, "grad_norm": 0.31550687551498413, "learning_rate": 1.740450118509551e-06, "loss": 2.6438, "step": 11222 }, { "epoch": 0.9202357161158083, "grad_norm": 0.32428231835365295, "learning_rate": 1.7333658949218068e-06, "loss": 2.6175, "step": 11224 }, { "epoch": 0.9203996925441967, "grad_norm": 0.32101696729660034, "learning_rate": 1.7262958640243686e-06, "loss": 2.718, "step": 11226 }, { "epoch": 0.9205636689725852, "grad_norm": 0.31713783740997314, "learning_rate": 1.7192400278961685e-06, "loss": 2.6543, "step": 11228 }, { "epoch": 0.9207276454009736, "grad_norm": 0.3120727241039276, "learning_rate": 1.7121983886119518e-06, "loss": 2.5992, "step": 11230 }, { "epoch": 0.920891621829362, "grad_norm": 0.31299081444740295, "learning_rate": 1.7051709482422907e-06, "loss": 2.6692, "step": 11232 }, { "epoch": 0.9210555982577504, "grad_norm": 0.3186694383621216, "learning_rate": 1.6981577088536093e-06, "loss": 2.6494, "step": 11234 }, { "epoch": 0.9212195746861389, "grad_norm": 0.3170822858810425, "learning_rate": 1.6911586725081086e-06, "loss": 2.6388, "step": 11236 }, { "epoch": 0.9213835511145273, "grad_norm": 0.3165086805820465, "learning_rate": 1.6841738412638476e-06, "loss": 2.6485, "step": 11238 }, { "epoch": 0.9215475275429157, "grad_norm": 0.3357534110546112, "learning_rate": 1.677203217174711e-06, "loss": 2.6726, "step": 11240 }, { "epoch": 0.9217115039713041, "grad_norm": 0.3138125538825989, "learning_rate": 1.670246802290376e-06, "loss": 2.6057, "step": 11242 }, { "epoch": 0.9218754803996926, "grad_norm": 0.312775582075119, "learning_rate": 1.6633045986563677e-06, "loss": 2.6327, "step": 11244 }, { "epoch": 0.922039456828081, "grad_norm": 0.3176496922969818, "learning_rate": 1.6563766083140309e-06, "loss": 2.6801, "step": 11246 }, { "epoch": 0.9222034332564694, "grad_norm": 0.33674001693725586, "learning_rate": 1.649462833300519e-06, "loss": 2.6603, "step": 11248 }, { "epoch": 0.9223674096848578, "grad_norm": 0.3119502365589142, "learning_rate": 1.6425632756488063e-06, "loss": 2.6653, "step": 11250 }, { "epoch": 0.9225313861132463, "grad_norm": 0.30792686343193054, "learning_rate": 1.6356779373877086e-06, "loss": 2.6709, "step": 11252 }, { "epoch": 0.9226953625416346, "grad_norm": 0.3173947036266327, "learning_rate": 1.6288068205418228e-06, "loss": 2.6457, "step": 11254 }, { "epoch": 0.922859338970023, "grad_norm": 0.3190482556819916, "learning_rate": 1.621949927131594e-06, "loss": 2.6648, "step": 11256 }, { "epoch": 0.9230233153984114, "grad_norm": 0.31699302792549133, "learning_rate": 1.6151072591732762e-06, "loss": 2.6714, "step": 11258 }, { "epoch": 0.9231872918267999, "grad_norm": 0.31023791432380676, "learning_rate": 1.6082788186789487e-06, "loss": 2.6642, "step": 11260 }, { "epoch": 0.9233512682551883, "grad_norm": 0.3155538737773895, "learning_rate": 1.6014646076564777e-06, "loss": 2.6554, "step": 11262 }, { "epoch": 0.9235152446835767, "grad_norm": 0.329487144947052, "learning_rate": 1.5946646281095823e-06, "loss": 2.6312, "step": 11264 }, { "epoch": 0.9236792211119652, "grad_norm": 0.3233356475830078, "learning_rate": 1.5878788820377744e-06, "loss": 2.672, "step": 11266 }, { "epoch": 0.9238431975403536, "grad_norm": 0.31206730008125305, "learning_rate": 1.5811073714363856e-06, "loss": 2.6522, "step": 11268 }, { "epoch": 0.924007173968742, "grad_norm": 0.3276565968990326, "learning_rate": 1.5743500982965676e-06, "loss": 2.6738, "step": 11270 }, { "epoch": 0.9241711503971304, "grad_norm": 0.3315548896789551, "learning_rate": 1.5676070646052754e-06, "loss": 2.7152, "step": 11272 }, { "epoch": 0.9243351268255189, "grad_norm": 0.31984102725982666, "learning_rate": 1.5608782723452842e-06, "loss": 2.6493, "step": 11274 }, { "epoch": 0.9244991032539073, "grad_norm": 0.32133135199546814, "learning_rate": 1.5541637234951779e-06, "loss": 2.6556, "step": 11276 }, { "epoch": 0.9246630796822957, "grad_norm": 0.31586137413978577, "learning_rate": 1.5474634200293659e-06, "loss": 2.7183, "step": 11278 }, { "epoch": 0.924827056110684, "grad_norm": 0.3145774006843567, "learning_rate": 1.540777363918039e-06, "loss": 2.657, "step": 11280 }, { "epoch": 0.9249910325390726, "grad_norm": 0.31683436036109924, "learning_rate": 1.5341055571272188e-06, "loss": 2.6902, "step": 11282 }, { "epoch": 0.925155008967461, "grad_norm": 0.31885311007499695, "learning_rate": 1.5274480016187475e-06, "loss": 2.6379, "step": 11284 }, { "epoch": 0.9253189853958493, "grad_norm": 0.3152226209640503, "learning_rate": 1.5208046993502478e-06, "loss": 2.6724, "step": 11286 }, { "epoch": 0.9254829618242377, "grad_norm": 0.3215556740760803, "learning_rate": 1.514175652275185e-06, "loss": 2.6364, "step": 11288 }, { "epoch": 0.9256469382526262, "grad_norm": 0.3138490617275238, "learning_rate": 1.5075608623427994e-06, "loss": 2.6814, "step": 11290 }, { "epoch": 0.9258109146810146, "grad_norm": 0.30410242080688477, "learning_rate": 1.5009603314981635e-06, "loss": 2.637, "step": 11292 }, { "epoch": 0.925974891109403, "grad_norm": 0.3354378044605255, "learning_rate": 1.4943740616821356e-06, "loss": 2.6112, "step": 11294 }, { "epoch": 0.9261388675377914, "grad_norm": 0.31590309739112854, "learning_rate": 1.487802054831411e-06, "loss": 2.6465, "step": 11296 }, { "epoch": 0.9263028439661799, "grad_norm": 0.3209439516067505, "learning_rate": 1.4812443128784548e-06, "loss": 2.6574, "step": 11298 }, { "epoch": 0.9264668203945683, "grad_norm": 0.3104178011417389, "learning_rate": 1.4747008377515747e-06, "loss": 2.6672, "step": 11300 }, { "epoch": 0.9266307968229567, "grad_norm": 0.31877386569976807, "learning_rate": 1.4681716313748429e-06, "loss": 2.7127, "step": 11302 }, { "epoch": 0.9267947732513451, "grad_norm": 0.332344114780426, "learning_rate": 1.461656695668173e-06, "loss": 2.6764, "step": 11304 }, { "epoch": 0.9269587496797336, "grad_norm": 0.32066187262535095, "learning_rate": 1.4551560325472612e-06, "loss": 2.7423, "step": 11306 }, { "epoch": 0.927122726108122, "grad_norm": 0.3197905719280243, "learning_rate": 1.4486696439236114e-06, "loss": 2.6696, "step": 11308 }, { "epoch": 0.9272867025365104, "grad_norm": 0.3223975896835327, "learning_rate": 1.442197531704531e-06, "loss": 2.6982, "step": 11310 }, { "epoch": 0.9274506789648987, "grad_norm": 0.32069551944732666, "learning_rate": 1.4357396977931313e-06, "loss": 2.6684, "step": 11312 }, { "epoch": 0.9276146553932872, "grad_norm": 0.3139919638633728, "learning_rate": 1.4292961440883267e-06, "loss": 2.6681, "step": 11314 }, { "epoch": 0.9277786318216756, "grad_norm": 0.3215121030807495, "learning_rate": 1.4228668724848127e-06, "loss": 2.6538, "step": 11316 }, { "epoch": 0.927942608250064, "grad_norm": 0.32299116253852844, "learning_rate": 1.4164518848731267e-06, "loss": 2.7107, "step": 11318 }, { "epoch": 0.9281065846784524, "grad_norm": 0.32958459854125977, "learning_rate": 1.4100511831395602e-06, "loss": 2.6268, "step": 11320 }, { "epoch": 0.9282705611068409, "grad_norm": 0.3229326009750366, "learning_rate": 1.4036647691662353e-06, "loss": 2.6588, "step": 11322 }, { "epoch": 0.9284345375352293, "grad_norm": 0.32700425386428833, "learning_rate": 1.3972926448310553e-06, "loss": 2.66, "step": 11324 }, { "epoch": 0.9285985139636177, "grad_norm": 0.32083988189697266, "learning_rate": 1.3909348120077325e-06, "loss": 2.6687, "step": 11326 }, { "epoch": 0.9287624903920062, "grad_norm": 0.3176613748073578, "learning_rate": 1.3845912725657718e-06, "loss": 2.6329, "step": 11328 }, { "epoch": 0.9289264668203946, "grad_norm": 0.3086274266242981, "learning_rate": 1.378262028370475e-06, "loss": 2.6067, "step": 11330 }, { "epoch": 0.929090443248783, "grad_norm": 0.3170456290245056, "learning_rate": 1.3719470812829483e-06, "loss": 2.6667, "step": 11332 }, { "epoch": 0.9292544196771714, "grad_norm": 0.3041694164276123, "learning_rate": 1.365646433160067e-06, "loss": 2.6751, "step": 11334 }, { "epoch": 0.9294183961055599, "grad_norm": 0.30273258686065674, "learning_rate": 1.3593600858545497e-06, "loss": 2.6488, "step": 11336 }, { "epoch": 0.9295823725339483, "grad_norm": 0.3191751539707184, "learning_rate": 1.3530880412148617e-06, "loss": 2.7162, "step": 11338 }, { "epoch": 0.9297463489623367, "grad_norm": 0.322712779045105, "learning_rate": 1.3468303010852889e-06, "loss": 2.658, "step": 11340 }, { "epoch": 0.929910325390725, "grad_norm": 0.3108297288417816, "learning_rate": 1.3405868673059096e-06, "loss": 2.7084, "step": 11342 }, { "epoch": 0.9300743018191135, "grad_norm": 0.3129449784755707, "learning_rate": 1.3343577417125885e-06, "loss": 2.6463, "step": 11344 }, { "epoch": 0.9302382782475019, "grad_norm": 0.3245464563369751, "learning_rate": 1.3281429261369772e-06, "loss": 2.6797, "step": 11346 }, { "epoch": 0.9304022546758903, "grad_norm": 0.3139954209327698, "learning_rate": 1.3219424224065412e-06, "loss": 2.6336, "step": 11348 }, { "epoch": 0.9305662311042787, "grad_norm": 0.324217289686203, "learning_rate": 1.3157562323445116e-06, "loss": 2.66, "step": 11350 }, { "epoch": 0.9307302075326672, "grad_norm": 0.31187063455581665, "learning_rate": 1.3095843577699329e-06, "loss": 2.7267, "step": 11352 }, { "epoch": 0.9308941839610556, "grad_norm": 0.3174886703491211, "learning_rate": 1.3034268004976258e-06, "loss": 2.6486, "step": 11354 }, { "epoch": 0.931058160389444, "grad_norm": 0.31796005368232727, "learning_rate": 1.2972835623382028e-06, "loss": 2.6316, "step": 11356 }, { "epoch": 0.9312221368178324, "grad_norm": 0.3064802885055542, "learning_rate": 1.2911546450980694e-06, "loss": 2.587, "step": 11358 }, { "epoch": 0.9313861132462209, "grad_norm": 0.31813356280326843, "learning_rate": 1.285040050579428e-06, "loss": 2.6935, "step": 11360 }, { "epoch": 0.9315500896746093, "grad_norm": 0.31423598527908325, "learning_rate": 1.2789397805802517e-06, "loss": 2.6784, "step": 11362 }, { "epoch": 0.9317140661029977, "grad_norm": 0.3162352740764618, "learning_rate": 1.2728538368943054e-06, "loss": 2.5995, "step": 11364 }, { "epoch": 0.9318780425313861, "grad_norm": 0.3179550766944885, "learning_rate": 1.266782221311169e-06, "loss": 2.6306, "step": 11366 }, { "epoch": 0.9320420189597746, "grad_norm": 0.31694236397743225, "learning_rate": 1.2607249356161644e-06, "loss": 2.6738, "step": 11368 }, { "epoch": 0.932205995388163, "grad_norm": 0.33349114656448364, "learning_rate": 1.2546819815904277e-06, "loss": 2.6419, "step": 11370 }, { "epoch": 0.9323699718165513, "grad_norm": 0.3220036029815674, "learning_rate": 1.2486533610108764e-06, "loss": 2.6227, "step": 11372 }, { "epoch": 0.9325339482449397, "grad_norm": 0.30876314640045166, "learning_rate": 1.2426390756502203e-06, "loss": 2.6377, "step": 11374 }, { "epoch": 0.9326979246733282, "grad_norm": 0.3193575143814087, "learning_rate": 1.236639127276934e-06, "loss": 2.6515, "step": 11376 }, { "epoch": 0.9328619011017166, "grad_norm": 0.3227860927581787, "learning_rate": 1.2306535176552947e-06, "loss": 2.7062, "step": 11378 }, { "epoch": 0.933025877530105, "grad_norm": 0.3084670603275299, "learning_rate": 1.2246822485453557e-06, "loss": 2.6721, "step": 11380 }, { "epoch": 0.9331898539584935, "grad_norm": 0.320641428232193, "learning_rate": 1.2187253217029515e-06, "loss": 2.659, "step": 11382 }, { "epoch": 0.9333538303868819, "grad_norm": 0.31296634674072266, "learning_rate": 1.2127827388797086e-06, "loss": 2.6929, "step": 11384 }, { "epoch": 0.9335178068152703, "grad_norm": 0.31565678119659424, "learning_rate": 1.2068545018230236e-06, "loss": 2.7138, "step": 11386 }, { "epoch": 0.9336817832436587, "grad_norm": 0.31796878576278687, "learning_rate": 1.2009406122760802e-06, "loss": 2.5898, "step": 11388 }, { "epoch": 0.9338457596720472, "grad_norm": 0.3097228407859802, "learning_rate": 1.1950410719778483e-06, "loss": 2.6352, "step": 11390 }, { "epoch": 0.9340097361004356, "grad_norm": 0.3197804093360901, "learning_rate": 1.1891558826630788e-06, "loss": 2.6556, "step": 11392 }, { "epoch": 0.934173712528824, "grad_norm": 0.3061419725418091, "learning_rate": 1.183285046062288e-06, "loss": 2.6599, "step": 11394 }, { "epoch": 0.9343376889572124, "grad_norm": 0.3058796226978302, "learning_rate": 1.1774285639017779e-06, "loss": 2.7197, "step": 11396 }, { "epoch": 0.9345016653856009, "grad_norm": 0.3441823124885559, "learning_rate": 1.1715864379036435e-06, "loss": 2.6677, "step": 11398 }, { "epoch": 0.9346656418139893, "grad_norm": 0.31811004877090454, "learning_rate": 1.1657586697857493e-06, "loss": 2.6396, "step": 11400 }, { "epoch": 0.9348296182423776, "grad_norm": 0.30890345573425293, "learning_rate": 1.15994526126173e-06, "loss": 2.5966, "step": 11402 }, { "epoch": 0.934993594670766, "grad_norm": 0.3229040801525116, "learning_rate": 1.1541462140410074e-06, "loss": 2.6152, "step": 11404 }, { "epoch": 0.9351575710991545, "grad_norm": 0.3088882863521576, "learning_rate": 1.148361529828773e-06, "loss": 2.6628, "step": 11406 }, { "epoch": 0.9353215475275429, "grad_norm": 0.31260162591934204, "learning_rate": 1.1425912103260107e-06, "loss": 2.6544, "step": 11408 }, { "epoch": 0.9354855239559313, "grad_norm": 0.3307812511920929, "learning_rate": 1.1368352572294627e-06, "loss": 2.6412, "step": 11410 }, { "epoch": 0.9356495003843197, "grad_norm": 0.3134661614894867, "learning_rate": 1.1310936722316423e-06, "loss": 2.674, "step": 11412 }, { "epoch": 0.9358134768127082, "grad_norm": 0.3169810473918915, "learning_rate": 1.1253664570208711e-06, "loss": 2.6534, "step": 11414 }, { "epoch": 0.9359774532410966, "grad_norm": 0.3222421705722809, "learning_rate": 1.119653613281213e-06, "loss": 2.6294, "step": 11416 }, { "epoch": 0.936141429669485, "grad_norm": 0.3155307173728943, "learning_rate": 1.1139551426925076e-06, "loss": 2.6865, "step": 11418 }, { "epoch": 0.9363054060978734, "grad_norm": 0.3088846504688263, "learning_rate": 1.1082710469303925e-06, "loss": 2.6026, "step": 11420 }, { "epoch": 0.9364693825262619, "grad_norm": 0.31774112582206726, "learning_rate": 1.1026013276662527e-06, "loss": 2.6684, "step": 11422 }, { "epoch": 0.9366333589546503, "grad_norm": 0.31304025650024414, "learning_rate": 1.096945986567255e-06, "loss": 2.6339, "step": 11424 }, { "epoch": 0.9367973353830387, "grad_norm": 0.31352925300598145, "learning_rate": 1.0913050252963463e-06, "loss": 2.6655, "step": 11426 }, { "epoch": 0.9369613118114271, "grad_norm": 0.30806317925453186, "learning_rate": 1.0856784455122337e-06, "loss": 2.6584, "step": 11428 }, { "epoch": 0.9371252882398156, "grad_norm": 0.310808926820755, "learning_rate": 1.0800662488693936e-06, "loss": 2.6785, "step": 11430 }, { "epoch": 0.937289264668204, "grad_norm": 0.3152698576450348, "learning_rate": 1.0744684370180945e-06, "loss": 2.6909, "step": 11432 }, { "epoch": 0.9374532410965923, "grad_norm": 0.30823028087615967, "learning_rate": 1.068885011604348e-06, "loss": 2.6819, "step": 11434 }, { "epoch": 0.9376172175249808, "grad_norm": 0.3168531358242035, "learning_rate": 1.0633159742699517e-06, "loss": 2.6289, "step": 11436 }, { "epoch": 0.9377811939533692, "grad_norm": 0.30955854058265686, "learning_rate": 1.0577613266524622e-06, "loss": 2.6683, "step": 11438 }, { "epoch": 0.9379451703817576, "grad_norm": 0.30956169962882996, "learning_rate": 1.0522210703852175e-06, "loss": 2.7246, "step": 11440 }, { "epoch": 0.938109146810146, "grad_norm": 0.31866222620010376, "learning_rate": 1.0466952070973147e-06, "loss": 2.6612, "step": 11442 }, { "epoch": 0.9382731232385345, "grad_norm": 0.31868621706962585, "learning_rate": 1.0411837384136203e-06, "loss": 2.6739, "step": 11444 }, { "epoch": 0.9384370996669229, "grad_norm": 0.31372714042663574, "learning_rate": 1.0356866659547714e-06, "loss": 2.6666, "step": 11446 }, { "epoch": 0.9386010760953113, "grad_norm": 0.306061714887619, "learning_rate": 1.0302039913371586e-06, "loss": 2.6244, "step": 11448 }, { "epoch": 0.9387650525236997, "grad_norm": 0.3235943615436554, "learning_rate": 1.0247357161729697e-06, "loss": 2.6721, "step": 11450 }, { "epoch": 0.9389290289520882, "grad_norm": 0.31293201446533203, "learning_rate": 1.0192818420701133e-06, "loss": 2.6512, "step": 11452 }, { "epoch": 0.9390930053804766, "grad_norm": 0.3098869025707245, "learning_rate": 1.0138423706323119e-06, "loss": 2.6621, "step": 11454 }, { "epoch": 0.939256981808865, "grad_norm": 0.3194812834262848, "learning_rate": 1.0084173034590084e-06, "loss": 2.6643, "step": 11456 }, { "epoch": 0.9394209582372534, "grad_norm": 0.31623008847236633, "learning_rate": 1.0030066421454543e-06, "loss": 2.6723, "step": 11458 }, { "epoch": 0.9395849346656419, "grad_norm": 0.3192461133003235, "learning_rate": 9.976103882826104e-07, "loss": 2.6759, "step": 11460 }, { "epoch": 0.9397489110940302, "grad_norm": 0.31996315717697144, "learning_rate": 9.922285434572687e-07, "loss": 2.6749, "step": 11462 }, { "epoch": 0.9399128875224186, "grad_norm": 0.32166633009910583, "learning_rate": 9.868611092519187e-07, "loss": 2.6875, "step": 11464 }, { "epoch": 0.940076863950807, "grad_norm": 0.31192925572395325, "learning_rate": 9.81508087244859e-07, "loss": 2.6142, "step": 11466 }, { "epoch": 0.9402408403791955, "grad_norm": 0.31853535771369934, "learning_rate": 9.76169479010125e-07, "loss": 2.6823, "step": 11468 }, { "epoch": 0.9404048168075839, "grad_norm": 0.3121114671230316, "learning_rate": 9.708452861175278e-07, "loss": 2.6573, "step": 11470 }, { "epoch": 0.9405687932359723, "grad_norm": 0.3293512165546417, "learning_rate": 9.655355101326258e-07, "loss": 2.6665, "step": 11472 }, { "epoch": 0.9407327696643607, "grad_norm": 0.3145938217639923, "learning_rate": 9.60240152616748e-07, "loss": 2.6715, "step": 11474 }, { "epoch": 0.9408967460927492, "grad_norm": 0.3192720115184784, "learning_rate": 9.549592151269937e-07, "loss": 2.6899, "step": 11476 }, { "epoch": 0.9410607225211376, "grad_norm": 0.3051421344280243, "learning_rate": 9.49692699216187e-07, "loss": 2.6796, "step": 11478 }, { "epoch": 0.941224698949526, "grad_norm": 0.31059160828590393, "learning_rate": 9.444406064329613e-07, "loss": 2.6302, "step": 11480 }, { "epoch": 0.9413886753779144, "grad_norm": 0.31281107664108276, "learning_rate": 9.392029383216649e-07, "loss": 2.6035, "step": 11482 }, { "epoch": 0.9415526518063029, "grad_norm": 0.31089380383491516, "learning_rate": 9.339796964224268e-07, "loss": 2.6751, "step": 11484 }, { "epoch": 0.9417166282346913, "grad_norm": 0.305430144071579, "learning_rate": 9.287708822711239e-07, "loss": 2.6093, "step": 11486 }, { "epoch": 0.9418806046630797, "grad_norm": 0.3031880557537079, "learning_rate": 9.235764973994088e-07, "loss": 2.6253, "step": 11488 }, { "epoch": 0.942044581091468, "grad_norm": 0.3189202845096588, "learning_rate": 9.183965433346598e-07, "loss": 2.5981, "step": 11490 }, { "epoch": 0.9422085575198565, "grad_norm": 0.3167671859264374, "learning_rate": 9.132310216000472e-07, "loss": 2.6197, "step": 11492 }, { "epoch": 0.9423725339482449, "grad_norm": 0.311166375875473, "learning_rate": 9.08079933714473e-07, "loss": 2.6203, "step": 11494 }, { "epoch": 0.9425365103766333, "grad_norm": 0.31139346957206726, "learning_rate": 9.029432811925919e-07, "loss": 2.6999, "step": 11496 }, { "epoch": 0.9427004868050218, "grad_norm": 0.33040741086006165, "learning_rate": 8.97821065544846e-07, "loss": 2.6742, "step": 11498 }, { "epoch": 0.9428644632334102, "grad_norm": 0.322986364364624, "learning_rate": 8.92713288277397e-07, "loss": 2.6691, "step": 11500 }, { "epoch": 0.9430284396617986, "grad_norm": 0.3197978436946869, "learning_rate": 8.876199508921768e-07, "loss": 2.6681, "step": 11502 }, { "epoch": 0.943192416090187, "grad_norm": 0.3120407164096832, "learning_rate": 8.825410548868762e-07, "loss": 2.6395, "step": 11504 }, { "epoch": 0.9433563925185755, "grad_norm": 0.3180319368839264, "learning_rate": 8.77476601754923e-07, "loss": 2.6234, "step": 11506 }, { "epoch": 0.9435203689469639, "grad_norm": 0.31533265113830566, "learning_rate": 8.724265929855091e-07, "loss": 2.6093, "step": 11508 }, { "epoch": 0.9436843453753523, "grad_norm": 0.31990543007850647, "learning_rate": 8.673910300635857e-07, "loss": 2.6694, "step": 11510 }, { "epoch": 0.9438483218037407, "grad_norm": 0.31744059920310974, "learning_rate": 8.623699144698349e-07, "loss": 2.6776, "step": 11512 }, { "epoch": 0.9440122982321292, "grad_norm": 0.3098289966583252, "learning_rate": 8.573632476807148e-07, "loss": 2.6381, "step": 11514 }, { "epoch": 0.9441762746605176, "grad_norm": 0.3186374306678772, "learning_rate": 8.523710311684252e-07, "loss": 2.6759, "step": 11516 }, { "epoch": 0.944340251088906, "grad_norm": 0.32661184668540955, "learning_rate": 8.473932664009088e-07, "loss": 2.6943, "step": 11518 }, { "epoch": 0.9445042275172943, "grad_norm": 0.3162136673927307, "learning_rate": 8.424299548418668e-07, "loss": 2.6864, "step": 11520 }, { "epoch": 0.9446682039456828, "grad_norm": 0.31532377004623413, "learning_rate": 8.374810979507541e-07, "loss": 2.6459, "step": 11522 }, { "epoch": 0.9448321803740712, "grad_norm": 0.31813204288482666, "learning_rate": 8.325466971827677e-07, "loss": 2.6434, "step": 11524 }, { "epoch": 0.9449961568024596, "grad_norm": 0.3034104108810425, "learning_rate": 8.276267539888471e-07, "loss": 2.5916, "step": 11526 }, { "epoch": 0.945160133230848, "grad_norm": 0.3112906813621521, "learning_rate": 8.227212698157072e-07, "loss": 2.7007, "step": 11528 }, { "epoch": 0.9453241096592365, "grad_norm": 0.30479559302330017, "learning_rate": 8.178302461057829e-07, "loss": 2.6373, "step": 11530 }, { "epoch": 0.9454880860876249, "grad_norm": 0.3049776554107666, "learning_rate": 8.129536842972741e-07, "loss": 2.6188, "step": 11532 }, { "epoch": 0.9456520625160133, "grad_norm": 0.31825312972068787, "learning_rate": 8.080915858241112e-07, "loss": 2.6194, "step": 11534 }, { "epoch": 0.9458160389444017, "grad_norm": 0.30667853355407715, "learning_rate": 8.032439521160007e-07, "loss": 2.6173, "step": 11536 }, { "epoch": 0.9459800153727902, "grad_norm": 0.32208767533302307, "learning_rate": 7.984107845983634e-07, "loss": 2.6559, "step": 11538 }, { "epoch": 0.9461439918011786, "grad_norm": 0.3034205436706543, "learning_rate": 7.935920846923849e-07, "loss": 2.6157, "step": 11540 }, { "epoch": 0.946307968229567, "grad_norm": 0.31524085998535156, "learning_rate": 7.887878538149984e-07, "loss": 2.6004, "step": 11542 }, { "epoch": 0.9464719446579554, "grad_norm": 0.31821316480636597, "learning_rate": 7.839980933788627e-07, "loss": 2.6609, "step": 11544 }, { "epoch": 0.9466359210863439, "grad_norm": 0.3100659251213074, "learning_rate": 7.792228047924122e-07, "loss": 2.612, "step": 11546 }, { "epoch": 0.9467998975147323, "grad_norm": 0.3110487163066864, "learning_rate": 7.744619894598015e-07, "loss": 2.6629, "step": 11548 }, { "epoch": 0.9469638739431206, "grad_norm": 0.3124012053012848, "learning_rate": 7.697156487809332e-07, "loss": 2.6793, "step": 11550 }, { "epoch": 0.9471278503715092, "grad_norm": 0.30758899450302124, "learning_rate": 7.649837841514574e-07, "loss": 2.6131, "step": 11552 }, { "epoch": 0.9472918267998975, "grad_norm": 0.32231613993644714, "learning_rate": 7.602663969627832e-07, "loss": 2.6823, "step": 11554 }, { "epoch": 0.9474558032282859, "grad_norm": 0.31516873836517334, "learning_rate": 7.555634886020291e-07, "loss": 2.7002, "step": 11556 }, { "epoch": 0.9476197796566743, "grad_norm": 0.3096575438976288, "learning_rate": 7.508750604520831e-07, "loss": 2.6907, "step": 11558 }, { "epoch": 0.9477837560850628, "grad_norm": 0.3314553499221802, "learning_rate": 7.462011138915592e-07, "loss": 2.6749, "step": 11560 }, { "epoch": 0.9479477325134512, "grad_norm": 0.30644360184669495, "learning_rate": 7.415416502948303e-07, "loss": 2.6491, "step": 11562 }, { "epoch": 0.9481117089418396, "grad_norm": 0.310377299785614, "learning_rate": 7.368966710320003e-07, "loss": 2.6423, "step": 11564 }, { "epoch": 0.948275685370228, "grad_norm": 0.31963488459587097, "learning_rate": 7.322661774689044e-07, "loss": 2.6566, "step": 11566 }, { "epoch": 0.9484396617986165, "grad_norm": 0.32211312651634216, "learning_rate": 7.276501709671313e-07, "loss": 2.6683, "step": 11568 }, { "epoch": 0.9486036382270049, "grad_norm": 0.3100370764732361, "learning_rate": 7.230486528840175e-07, "loss": 2.673, "step": 11570 }, { "epoch": 0.9487676146553933, "grad_norm": 0.32249924540519714, "learning_rate": 7.184616245726195e-07, "loss": 2.6561, "step": 11572 }, { "epoch": 0.9489315910837817, "grad_norm": 0.31462588906288147, "learning_rate": 7.138890873817361e-07, "loss": 2.6517, "step": 11574 }, { "epoch": 0.9490955675121702, "grad_norm": 0.30535271763801575, "learning_rate": 7.093310426559252e-07, "loss": 2.6821, "step": 11576 }, { "epoch": 0.9492595439405586, "grad_norm": 0.3183198571205139, "learning_rate": 7.047874917354647e-07, "loss": 2.6463, "step": 11578 }, { "epoch": 0.949423520368947, "grad_norm": 0.313385546207428, "learning_rate": 7.002584359563691e-07, "loss": 2.6804, "step": 11580 }, { "epoch": 0.9495874967973353, "grad_norm": 0.31047648191452026, "learning_rate": 6.95743876650401e-07, "loss": 2.6401, "step": 11582 }, { "epoch": 0.9497514732257238, "grad_norm": 0.30423569679260254, "learning_rate": 6.91243815145054e-07, "loss": 2.6533, "step": 11584 }, { "epoch": 0.9499154496541122, "grad_norm": 0.3242630064487457, "learning_rate": 6.867582527635641e-07, "loss": 2.7103, "step": 11586 }, { "epoch": 0.9500794260825006, "grad_norm": 0.31865593791007996, "learning_rate": 6.822871908248929e-07, "loss": 2.7321, "step": 11588 }, { "epoch": 0.950243402510889, "grad_norm": 0.3137083351612091, "learning_rate": 6.77830630643761e-07, "loss": 2.6669, "step": 11590 }, { "epoch": 0.9504073789392775, "grad_norm": 0.31904491782188416, "learning_rate": 6.733885735305867e-07, "loss": 2.6614, "step": 11592 }, { "epoch": 0.9505713553676659, "grad_norm": 0.31024643778800964, "learning_rate": 6.689610207915698e-07, "loss": 2.6771, "step": 11594 }, { "epoch": 0.9507353317960543, "grad_norm": 0.3154480755329132, "learning_rate": 6.645479737286076e-07, "loss": 2.6334, "step": 11596 }, { "epoch": 0.9508993082244427, "grad_norm": 0.31415608525276184, "learning_rate": 6.601494336393454e-07, "loss": 2.6789, "step": 11598 }, { "epoch": 0.9510632846528312, "grad_norm": 0.3116513192653656, "learning_rate": 6.55765401817171e-07, "loss": 2.6893, "step": 11600 }, { "epoch": 0.9512272610812196, "grad_norm": 0.3124915361404419, "learning_rate": 6.513958795512032e-07, "loss": 2.6424, "step": 11602 }, { "epoch": 0.951391237509608, "grad_norm": 0.304374635219574, "learning_rate": 6.470408681262752e-07, "loss": 2.6022, "step": 11604 }, { "epoch": 0.9515552139379964, "grad_norm": 0.3110538125038147, "learning_rate": 6.427003688229738e-07, "loss": 2.6233, "step": 11606 }, { "epoch": 0.9517191903663849, "grad_norm": 0.31863468885421753, "learning_rate": 6.38374382917617e-07, "loss": 2.6396, "step": 11608 }, { "epoch": 0.9518831667947732, "grad_norm": 0.3147267997264862, "learning_rate": 6.340629116822427e-07, "loss": 2.5922, "step": 11610 }, { "epoch": 0.9520471432231616, "grad_norm": 0.3161526620388031, "learning_rate": 6.29765956384637e-07, "loss": 2.6708, "step": 11612 }, { "epoch": 0.9522111196515501, "grad_norm": 0.3212495446205139, "learning_rate": 6.254835182883057e-07, "loss": 2.634, "step": 11614 }, { "epoch": 0.9523750960799385, "grad_norm": 0.3099731504917145, "learning_rate": 6.212155986524859e-07, "loss": 2.6035, "step": 11616 }, { "epoch": 0.9525390725083269, "grad_norm": 0.3139784038066864, "learning_rate": 6.169621987321572e-07, "loss": 2.6615, "step": 11618 }, { "epoch": 0.9527030489367153, "grad_norm": 0.30947667360305786, "learning_rate": 6.127233197780246e-07, "loss": 2.6298, "step": 11620 }, { "epoch": 0.9528670253651038, "grad_norm": 0.3213445842266083, "learning_rate": 6.084989630365024e-07, "loss": 2.6811, "step": 11622 }, { "epoch": 0.9530310017934922, "grad_norm": 0.3112287223339081, "learning_rate": 6.042891297497688e-07, "loss": 2.6604, "step": 11624 }, { "epoch": 0.9531949782218806, "grad_norm": 0.3198995888233185, "learning_rate": 6.000938211557117e-07, "loss": 2.6248, "step": 11626 }, { "epoch": 0.953358954650269, "grad_norm": 0.3031315207481384, "learning_rate": 5.959130384879497e-07, "loss": 2.6593, "step": 11628 }, { "epoch": 0.9535229310786575, "grad_norm": 0.31492289900779724, "learning_rate": 5.917467829758384e-07, "loss": 2.633, "step": 11630 }, { "epoch": 0.9536869075070459, "grad_norm": 0.31080785393714905, "learning_rate": 5.875950558444476e-07, "loss": 2.6563, "step": 11632 }, { "epoch": 0.9538508839354343, "grad_norm": 0.30723899602890015, "learning_rate": 5.834578583145845e-07, "loss": 2.6336, "step": 11634 }, { "epoch": 0.9540148603638227, "grad_norm": 0.31835001707077026, "learning_rate": 5.793351916027811e-07, "loss": 2.6482, "step": 11636 }, { "epoch": 0.9541788367922112, "grad_norm": 0.3076898753643036, "learning_rate": 5.75227056921307e-07, "loss": 2.5823, "step": 11638 }, { "epoch": 0.9543428132205996, "grad_norm": 0.30599820613861084, "learning_rate": 5.711334554781345e-07, "loss": 2.6537, "step": 11640 }, { "epoch": 0.9545067896489879, "grad_norm": 0.3103514611721039, "learning_rate": 5.670543884769952e-07, "loss": 2.6508, "step": 11642 }, { "epoch": 0.9546707660773763, "grad_norm": 0.3100859224796295, "learning_rate": 5.629898571173131e-07, "loss": 2.645, "step": 11644 }, { "epoch": 0.9548347425057648, "grad_norm": 0.30624717473983765, "learning_rate": 5.589398625942654e-07, "loss": 2.6563, "step": 11646 }, { "epoch": 0.9549987189341532, "grad_norm": 0.3044147789478302, "learning_rate": 5.54904406098733e-07, "loss": 2.5818, "step": 11648 }, { "epoch": 0.9551626953625416, "grad_norm": 0.30894699692726135, "learning_rate": 5.508834888173441e-07, "loss": 2.7046, "step": 11650 }, { "epoch": 0.95532667179093, "grad_norm": 0.31238916516304016, "learning_rate": 5.468771119324312e-07, "loss": 2.6396, "step": 11652 }, { "epoch": 0.9554906482193185, "grad_norm": 0.3088591396808624, "learning_rate": 5.428852766220627e-07, "loss": 2.6236, "step": 11654 }, { "epoch": 0.9556546246477069, "grad_norm": 0.3266092538833618, "learning_rate": 5.38907984060033e-07, "loss": 2.6717, "step": 11656 }, { "epoch": 0.9558186010760953, "grad_norm": 0.3095443546772003, "learning_rate": 5.349452354158458e-07, "loss": 2.6222, "step": 11658 }, { "epoch": 0.9559825775044837, "grad_norm": 0.3061227798461914, "learning_rate": 5.309970318547464e-07, "loss": 2.6083, "step": 11660 }, { "epoch": 0.9561465539328722, "grad_norm": 0.30400049686431885, "learning_rate": 5.270633745376897e-07, "loss": 2.6552, "step": 11662 }, { "epoch": 0.9563105303612606, "grad_norm": 0.31368958950042725, "learning_rate": 5.231442646213613e-07, "loss": 2.6778, "step": 11664 }, { "epoch": 0.956474506789649, "grad_norm": 0.3110884726047516, "learning_rate": 5.192397032581676e-07, "loss": 2.6376, "step": 11666 }, { "epoch": 0.9566384832180375, "grad_norm": 0.3314817249774933, "learning_rate": 5.153496915962341e-07, "loss": 2.7156, "step": 11668 }, { "epoch": 0.9568024596464259, "grad_norm": 0.31033384799957275, "learning_rate": 5.114742307794073e-07, "loss": 2.6823, "step": 11670 }, { "epoch": 0.9569664360748142, "grad_norm": 0.3176211714744568, "learning_rate": 5.076133219472589e-07, "loss": 2.6071, "step": 11672 }, { "epoch": 0.9571304125032026, "grad_norm": 0.30577701330184937, "learning_rate": 5.037669662350808e-07, "loss": 2.6285, "step": 11674 }, { "epoch": 0.9572943889315911, "grad_norm": 0.3181045651435852, "learning_rate": 4.999351647738848e-07, "loss": 2.6475, "step": 11676 }, { "epoch": 0.9574583653599795, "grad_norm": 0.3024066984653473, "learning_rate": 4.96117918690403e-07, "loss": 2.6123, "step": 11678 }, { "epoch": 0.9576223417883679, "grad_norm": 0.3198937475681305, "learning_rate": 4.923152291070876e-07, "loss": 2.6354, "step": 11680 }, { "epoch": 0.9577863182167563, "grad_norm": 0.3103814423084259, "learning_rate": 4.885270971421052e-07, "loss": 2.59, "step": 11682 }, { "epoch": 0.9579502946451448, "grad_norm": 0.3161482810974121, "learning_rate": 4.847535239093593e-07, "loss": 2.7422, "step": 11684 }, { "epoch": 0.9581142710735332, "grad_norm": 0.30970585346221924, "learning_rate": 4.80994510518451e-07, "loss": 2.6655, "step": 11686 }, { "epoch": 0.9582782475019216, "grad_norm": 0.32038748264312744, "learning_rate": 4.772500580747074e-07, "loss": 2.6136, "step": 11688 }, { "epoch": 0.95844222393031, "grad_norm": 0.31170517206192017, "learning_rate": 4.735201676791812e-07, "loss": 2.6544, "step": 11690 }, { "epoch": 0.9586062003586985, "grad_norm": 0.3081343173980713, "learning_rate": 4.6980484042863924e-07, "loss": 2.5707, "step": 11692 }, { "epoch": 0.9587701767870869, "grad_norm": 0.3072557747364044, "learning_rate": 4.661040774155634e-07, "loss": 2.6516, "step": 11694 }, { "epoch": 0.9589341532154753, "grad_norm": 0.3171069324016571, "learning_rate": 4.6241787972814977e-07, "loss": 2.702, "step": 11696 }, { "epoch": 0.9590981296438637, "grad_norm": 0.3147324323654175, "learning_rate": 4.587462484503202e-07, "loss": 2.6178, "step": 11698 }, { "epoch": 0.9592621060722522, "grad_norm": 0.3084080219268799, "learning_rate": 4.550891846617111e-07, "loss": 2.6147, "step": 11700 }, { "epoch": 0.9594260825006405, "grad_norm": 0.31589874625205994, "learning_rate": 4.514466894376679e-07, "loss": 2.6193, "step": 11702 }, { "epoch": 0.9595900589290289, "grad_norm": 0.3130813241004944, "learning_rate": 4.4781876384926145e-07, "loss": 2.6905, "step": 11704 }, { "epoch": 0.9597540353574173, "grad_norm": 0.30354851484298706, "learning_rate": 4.4420540896327747e-07, "loss": 2.6913, "step": 11706 }, { "epoch": 0.9599180117858058, "grad_norm": 0.3142586946487427, "learning_rate": 4.406066258422104e-07, "loss": 2.651, "step": 11708 }, { "epoch": 0.9600819882141942, "grad_norm": 0.3102916181087494, "learning_rate": 4.370224155442693e-07, "loss": 2.6597, "step": 11710 }, { "epoch": 0.9602459646425826, "grad_norm": 0.3007727265357971, "learning_rate": 4.334527791233889e-07, "loss": 2.6267, "step": 11712 }, { "epoch": 0.960409941070971, "grad_norm": 0.31869107484817505, "learning_rate": 4.2989771762921295e-07, "loss": 2.6527, "step": 11714 }, { "epoch": 0.9605739174993595, "grad_norm": 0.3067484498023987, "learning_rate": 4.2635723210709967e-07, "loss": 2.6487, "step": 11716 }, { "epoch": 0.9607378939277479, "grad_norm": 0.31211501359939575, "learning_rate": 4.2283132359812185e-07, "loss": 2.6925, "step": 11718 }, { "epoch": 0.9609018703561363, "grad_norm": 0.3148888945579529, "learning_rate": 4.1931999313905565e-07, "loss": 2.6747, "step": 11720 }, { "epoch": 0.9610658467845248, "grad_norm": 0.3199276030063629, "learning_rate": 4.1582324176240306e-07, "loss": 2.6714, "step": 11722 }, { "epoch": 0.9612298232129132, "grad_norm": 0.30706924200057983, "learning_rate": 4.123410704963804e-07, "loss": 2.6414, "step": 11724 }, { "epoch": 0.9613937996413016, "grad_norm": 0.31506243348121643, "learning_rate": 4.08873480364913e-07, "loss": 2.6935, "step": 11726 }, { "epoch": 0.96155777606969, "grad_norm": 0.3069307804107666, "learning_rate": 4.054204723876298e-07, "loss": 2.6539, "step": 11728 }, { "epoch": 0.9617217524980785, "grad_norm": 0.30957096815109253, "learning_rate": 4.0198204757988523e-07, "loss": 2.6598, "step": 11730 }, { "epoch": 0.9618857289264668, "grad_norm": 0.3129814863204956, "learning_rate": 3.9855820695273715e-07, "loss": 2.6666, "step": 11732 }, { "epoch": 0.9620497053548552, "grad_norm": 0.31843897700309753, "learning_rate": 3.9514895151295807e-07, "loss": 2.6704, "step": 11734 }, { "epoch": 0.9622136817832436, "grad_norm": 0.31878137588500977, "learning_rate": 3.917542822630349e-07, "loss": 2.6761, "step": 11736 }, { "epoch": 0.9623776582116321, "grad_norm": 0.30806148052215576, "learning_rate": 3.883742002011581e-07, "loss": 2.6966, "step": 11738 }, { "epoch": 0.9625416346400205, "grad_norm": 0.31395968794822693, "learning_rate": 3.850087063212382e-07, "loss": 2.6769, "step": 11740 }, { "epoch": 0.9627056110684089, "grad_norm": 0.30206865072250366, "learning_rate": 3.8165780161288356e-07, "loss": 2.651, "step": 11742 }, { "epoch": 0.9628695874967973, "grad_norm": 0.3107939660549164, "learning_rate": 3.783214870614282e-07, "loss": 2.7045, "step": 11744 }, { "epoch": 0.9630335639251858, "grad_norm": 0.310419499874115, "learning_rate": 3.74999763647893e-07, "loss": 2.6295, "step": 11746 }, { "epoch": 0.9631975403535742, "grad_norm": 0.31614232063293457, "learning_rate": 3.716926323490411e-07, "loss": 2.6682, "step": 11748 }, { "epoch": 0.9633615167819626, "grad_norm": 0.31407174468040466, "learning_rate": 3.684000941373167e-07, "loss": 2.689, "step": 11750 }, { "epoch": 0.963525493210351, "grad_norm": 0.31245312094688416, "learning_rate": 3.6512214998088437e-07, "loss": 2.6996, "step": 11752 }, { "epoch": 0.9636894696387395, "grad_norm": 0.3079874813556671, "learning_rate": 3.618588008436119e-07, "loss": 2.6952, "step": 11754 }, { "epoch": 0.9638534460671279, "grad_norm": 0.3104054927825928, "learning_rate": 3.586100476850873e-07, "loss": 2.6326, "step": 11756 }, { "epoch": 0.9640174224955163, "grad_norm": 0.3092536926269531, "learning_rate": 3.5537589146059647e-07, "loss": 2.6024, "step": 11758 }, { "epoch": 0.9641813989239046, "grad_norm": 0.3117753863334656, "learning_rate": 3.5215633312113417e-07, "loss": 2.6415, "step": 11760 }, { "epoch": 0.9643453753522931, "grad_norm": 0.3105739653110504, "learning_rate": 3.4895137361339867e-07, "loss": 2.7196, "step": 11762 }, { "epoch": 0.9645093517806815, "grad_norm": 0.3109462261199951, "learning_rate": 3.457610138798084e-07, "loss": 2.6807, "step": 11764 }, { "epoch": 0.9646733282090699, "grad_norm": 0.30677148699760437, "learning_rate": 3.4258525485848514e-07, "loss": 2.6236, "step": 11766 }, { "epoch": 0.9648373046374583, "grad_norm": 0.31321364641189575, "learning_rate": 3.394240974832374e-07, "loss": 2.6852, "step": 11768 }, { "epoch": 0.9650012810658468, "grad_norm": 0.31202778220176697, "learning_rate": 3.3627754268361067e-07, "loss": 2.6813, "step": 11770 }, { "epoch": 0.9651652574942352, "grad_norm": 0.30900079011917114, "learning_rate": 3.331455913848369e-07, "loss": 2.6424, "step": 11772 }, { "epoch": 0.9653292339226236, "grad_norm": 0.3165871500968933, "learning_rate": 3.3002824450785727e-07, "loss": 2.6699, "step": 11774 }, { "epoch": 0.965493210351012, "grad_norm": 0.3114628791809082, "learning_rate": 3.269255029693219e-07, "loss": 2.7013, "step": 11776 }, { "epoch": 0.9656571867794005, "grad_norm": 0.30300936102867126, "learning_rate": 3.238373676815898e-07, "loss": 2.6666, "step": 11778 }, { "epoch": 0.9658211632077889, "grad_norm": 0.30220314860343933, "learning_rate": 3.207638395527068e-07, "loss": 2.7005, "step": 11780 }, { "epoch": 0.9659851396361773, "grad_norm": 0.3112805187702179, "learning_rate": 3.177049194864501e-07, "loss": 2.6652, "step": 11782 }, { "epoch": 0.9661491160645658, "grad_norm": 0.31147560477256775, "learning_rate": 3.146606083822834e-07, "loss": 2.6182, "step": 11784 }, { "epoch": 0.9663130924929542, "grad_norm": 0.3088638484477997, "learning_rate": 3.11630907135374e-07, "loss": 2.6232, "step": 11786 }, { "epoch": 0.9664770689213426, "grad_norm": 0.3055971562862396, "learning_rate": 3.086158166366093e-07, "loss": 2.6815, "step": 11788 }, { "epoch": 0.9666410453497309, "grad_norm": 0.3195469081401825, "learning_rate": 3.056153377725579e-07, "loss": 2.6886, "step": 11790 }, { "epoch": 0.9668050217781194, "grad_norm": 0.31553179025650024, "learning_rate": 3.026294714255085e-07, "loss": 2.6428, "step": 11792 }, { "epoch": 0.9669689982065078, "grad_norm": 0.31093811988830566, "learning_rate": 2.9965821847344755e-07, "loss": 2.6553, "step": 11794 }, { "epoch": 0.9671329746348962, "grad_norm": 0.30851665139198303, "learning_rate": 2.9670157979007074e-07, "loss": 2.6777, "step": 11796 }, { "epoch": 0.9672969510632846, "grad_norm": 0.3094952702522278, "learning_rate": 2.9375955624476037e-07, "loss": 2.6642, "step": 11798 }, { "epoch": 0.9674609274916731, "grad_norm": 0.3041982352733612, "learning_rate": 2.9083214870262444e-07, "loss": 2.6249, "step": 11800 }, { "epoch": 0.9676249039200615, "grad_norm": 0.30935415625572205, "learning_rate": 2.879193580244466e-07, "loss": 2.6827, "step": 11802 }, { "epoch": 0.9677888803484499, "grad_norm": 0.3120664656162262, "learning_rate": 2.8502118506673614e-07, "loss": 2.6725, "step": 11804 }, { "epoch": 0.9679528567768383, "grad_norm": 0.31115785241127014, "learning_rate": 2.8213763068168364e-07, "loss": 2.6658, "step": 11806 }, { "epoch": 0.9681168332052268, "grad_norm": 0.31378933787345886, "learning_rate": 2.7926869571720526e-07, "loss": 2.6349, "step": 11808 }, { "epoch": 0.9682808096336152, "grad_norm": 0.3109830617904663, "learning_rate": 2.764143810168929e-07, "loss": 2.6607, "step": 11810 }, { "epoch": 0.9684447860620036, "grad_norm": 0.30570733547210693, "learning_rate": 2.735746874200529e-07, "loss": 2.6469, "step": 11812 }, { "epoch": 0.968608762490392, "grad_norm": 0.3089127838611603, "learning_rate": 2.7074961576170067e-07, "loss": 2.6508, "step": 11814 }, { "epoch": 0.9687727389187805, "grad_norm": 0.3120940327644348, "learning_rate": 2.6793916687253287e-07, "loss": 2.6214, "step": 11816 }, { "epoch": 0.9689367153471689, "grad_norm": 0.3111661970615387, "learning_rate": 2.651433415789495e-07, "loss": 2.6793, "step": 11818 }, { "epoch": 0.9691006917755572, "grad_norm": 0.31746578216552734, "learning_rate": 2.6236214070307076e-07, "loss": 2.682, "step": 11820 }, { "epoch": 0.9692646682039456, "grad_norm": 0.313875675201416, "learning_rate": 2.59595565062698e-07, "loss": 2.6931, "step": 11822 }, { "epoch": 0.9694286446323341, "grad_norm": 0.30731552839279175, "learning_rate": 2.5684361547132497e-07, "loss": 2.6783, "step": 11824 }, { "epoch": 0.9695926210607225, "grad_norm": 0.3081950843334198, "learning_rate": 2.54106292738171e-07, "loss": 2.671, "step": 11826 }, { "epoch": 0.9697565974891109, "grad_norm": 0.31837812066078186, "learning_rate": 2.513835976681256e-07, "loss": 2.625, "step": 11828 }, { "epoch": 0.9699205739174993, "grad_norm": 0.313892662525177, "learning_rate": 2.486755310618039e-07, "loss": 2.6794, "step": 11830 }, { "epoch": 0.9700845503458878, "grad_norm": 0.3089045584201813, "learning_rate": 2.459820937155022e-07, "loss": 2.6825, "step": 11832 }, { "epoch": 0.9702485267742762, "grad_norm": 0.3074551820755005, "learning_rate": 2.433032864212148e-07, "loss": 2.6703, "step": 11834 }, { "epoch": 0.9704125032026646, "grad_norm": 0.30913299322128296, "learning_rate": 2.4063910996664497e-07, "loss": 2.6524, "step": 11836 }, { "epoch": 0.9705764796310531, "grad_norm": 0.31632474064826965, "learning_rate": 2.3798956513518266e-07, "loss": 2.6922, "step": 11838 }, { "epoch": 0.9707404560594415, "grad_norm": 0.3163028359413147, "learning_rate": 2.3535465270592137e-07, "loss": 2.6321, "step": 11840 }, { "epoch": 0.9709044324878299, "grad_norm": 0.305569589138031, "learning_rate": 2.327343734536469e-07, "loss": 2.6632, "step": 11842 }, { "epoch": 0.9710684089162183, "grad_norm": 0.3147644102573395, "learning_rate": 2.3012872814885956e-07, "loss": 2.6896, "step": 11844 }, { "epoch": 0.9712323853446068, "grad_norm": 0.3178633153438568, "learning_rate": 2.2753771755772979e-07, "loss": 2.65, "step": 11846 }, { "epoch": 0.9713963617729952, "grad_norm": 0.30912619829177856, "learning_rate": 2.2496134244214262e-07, "loss": 2.6904, "step": 11848 }, { "epoch": 0.9715603382013835, "grad_norm": 0.3227543830871582, "learning_rate": 2.2239960355968093e-07, "loss": 2.6756, "step": 11850 }, { "epoch": 0.9717243146297719, "grad_norm": 0.3099755346775055, "learning_rate": 2.1985250166360883e-07, "loss": 2.6328, "step": 11852 }, { "epoch": 0.9718882910581604, "grad_norm": 0.30570080876350403, "learning_rate": 2.1732003750289941e-07, "loss": 2.7132, "step": 11854 }, { "epoch": 0.9720522674865488, "grad_norm": 0.3019803464412689, "learning_rate": 2.1480221182221816e-07, "loss": 2.6593, "step": 11856 }, { "epoch": 0.9722162439149372, "grad_norm": 0.3140520751476288, "learning_rate": 2.122990253619228e-07, "loss": 2.6813, "step": 11858 }, { "epoch": 0.9723802203433256, "grad_norm": 0.31120648980140686, "learning_rate": 2.098104788580746e-07, "loss": 2.6677, "step": 11860 }, { "epoch": 0.9725441967717141, "grad_norm": 0.31363654136657715, "learning_rate": 2.0733657304242149e-07, "loss": 2.6555, "step": 11862 }, { "epoch": 0.9727081732001025, "grad_norm": 0.32099053263664246, "learning_rate": 2.048773086424094e-07, "loss": 2.6698, "step": 11864 }, { "epoch": 0.9728721496284909, "grad_norm": 0.3177392780780792, "learning_rate": 2.0243268638118208e-07, "loss": 2.6315, "step": 11866 }, { "epoch": 0.9730361260568793, "grad_norm": 0.3135698437690735, "learning_rate": 2.0000270697757561e-07, "loss": 2.7135, "step": 11868 }, { "epoch": 0.9732001024852678, "grad_norm": 0.31334003806114197, "learning_rate": 1.9758737114611292e-07, "loss": 2.7068, "step": 11870 }, { "epoch": 0.9733640789136562, "grad_norm": 0.31074246764183044, "learning_rate": 1.951866795970203e-07, "loss": 2.6993, "step": 11872 }, { "epoch": 0.9735280553420446, "grad_norm": 0.30180931091308594, "learning_rate": 1.92800633036222e-07, "loss": 2.6554, "step": 11874 }, { "epoch": 0.973692031770433, "grad_norm": 0.3147827982902527, "learning_rate": 1.9042923216532337e-07, "loss": 2.6842, "step": 11876 }, { "epoch": 0.9738560081988215, "grad_norm": 0.309109091758728, "learning_rate": 1.8807247768163338e-07, "loss": 2.6201, "step": 11878 }, { "epoch": 0.9740199846272098, "grad_norm": 0.3139691650867462, "learning_rate": 1.8573037027814765e-07, "loss": 2.639, "step": 11880 }, { "epoch": 0.9741839610555982, "grad_norm": 0.3100583255290985, "learning_rate": 1.8340291064354864e-07, "loss": 2.6293, "step": 11882 }, { "epoch": 0.9743479374839866, "grad_norm": 0.3139925003051758, "learning_rate": 1.8109009946223888e-07, "loss": 2.6792, "step": 11884 }, { "epoch": 0.9745119139123751, "grad_norm": 0.31702014803886414, "learning_rate": 1.787919374142799e-07, "loss": 2.6501, "step": 11886 }, { "epoch": 0.9746758903407635, "grad_norm": 0.3096160590648651, "learning_rate": 1.7650842517544785e-07, "loss": 2.6659, "step": 11888 }, { "epoch": 0.9748398667691519, "grad_norm": 0.3171279728412628, "learning_rate": 1.742395634171945e-07, "loss": 2.7116, "step": 11890 }, { "epoch": 0.9750038431975403, "grad_norm": 0.3054068386554718, "learning_rate": 1.719853528066917e-07, "loss": 2.6019, "step": 11892 }, { "epoch": 0.9751678196259288, "grad_norm": 0.3155996799468994, "learning_rate": 1.6974579400677037e-07, "loss": 2.6418, "step": 11894 }, { "epoch": 0.9753317960543172, "grad_norm": 0.30555158853530884, "learning_rate": 1.675208876759704e-07, "loss": 2.674, "step": 11896 }, { "epoch": 0.9754957724827056, "grad_norm": 0.31385093927383423, "learning_rate": 1.6531063446851847e-07, "loss": 2.6767, "step": 11898 }, { "epoch": 0.9756597489110941, "grad_norm": 0.3114364445209503, "learning_rate": 1.6311503503434465e-07, "loss": 2.6479, "step": 11900 }, { "epoch": 0.9758237253394825, "grad_norm": 0.32317519187927246, "learning_rate": 1.609340900190437e-07, "loss": 2.6945, "step": 11902 }, { "epoch": 0.9759877017678709, "grad_norm": 0.3169836103916168, "learning_rate": 1.5876780006393034e-07, "loss": 2.6587, "step": 11904 }, { "epoch": 0.9761516781962593, "grad_norm": 0.3193982243537903, "learning_rate": 1.5661616580599504e-07, "loss": 2.6659, "step": 11906 }, { "epoch": 0.9763156546246478, "grad_norm": 0.3083401620388031, "learning_rate": 1.5447918787791503e-07, "loss": 2.6546, "step": 11908 }, { "epoch": 0.9764796310530361, "grad_norm": 0.31181463599205017, "learning_rate": 1.523568669080655e-07, "loss": 2.6676, "step": 11910 }, { "epoch": 0.9766436074814245, "grad_norm": 0.3096238672733307, "learning_rate": 1.5024920352051385e-07, "loss": 2.6274, "step": 11912 }, { "epoch": 0.9768075839098129, "grad_norm": 0.3070983290672302, "learning_rate": 1.4815619833500883e-07, "loss": 2.6654, "step": 11914 }, { "epoch": 0.9769715603382014, "grad_norm": 0.31458717584609985, "learning_rate": 1.4607785196699696e-07, "loss": 2.654, "step": 11916 }, { "epoch": 0.9771355367665898, "grad_norm": 0.3136763870716095, "learning_rate": 1.4401416502761166e-07, "loss": 2.7035, "step": 11918 }, { "epoch": 0.9772995131949782, "grad_norm": 0.30494287610054016, "learning_rate": 1.4196513812367307e-07, "loss": 2.7104, "step": 11920 }, { "epoch": 0.9774634896233666, "grad_norm": 0.31988272070884705, "learning_rate": 1.3993077185769365e-07, "loss": 2.6972, "step": 11922 }, { "epoch": 0.9776274660517551, "grad_norm": 0.311460018157959, "learning_rate": 1.379110668278727e-07, "loss": 2.733, "step": 11924 }, { "epoch": 0.9777914424801435, "grad_norm": 0.3143407702445984, "learning_rate": 1.3590602362810177e-07, "loss": 2.7131, "step": 11926 }, { "epoch": 0.9779554189085319, "grad_norm": 0.31846028566360474, "learning_rate": 1.3391564284795377e-07, "loss": 2.6579, "step": 11928 }, { "epoch": 0.9781193953369203, "grad_norm": 0.3061722218990326, "learning_rate": 1.319399250727049e-07, "loss": 2.7119, "step": 11930 }, { "epoch": 0.9782833717653088, "grad_norm": 0.30955928564071655, "learning_rate": 1.2997887088330163e-07, "loss": 2.6976, "step": 11932 }, { "epoch": 0.9784473481936972, "grad_norm": 0.3082471191883087, "learning_rate": 1.2803248085639373e-07, "loss": 2.6601, "step": 11934 }, { "epoch": 0.9786113246220856, "grad_norm": 0.318608820438385, "learning_rate": 1.2610075556431234e-07, "loss": 2.6876, "step": 11936 }, { "epoch": 0.978775301050474, "grad_norm": 0.30733296275138855, "learning_rate": 1.2418369557506414e-07, "loss": 2.6874, "step": 11938 }, { "epoch": 0.9789392774788624, "grad_norm": 0.31166955828666687, "learning_rate": 1.2228130145237604e-07, "loss": 2.6678, "step": 11940 }, { "epoch": 0.9791032539072508, "grad_norm": 0.3202204704284668, "learning_rate": 1.2039357375562832e-07, "loss": 2.7023, "step": 11942 }, { "epoch": 0.9792672303356392, "grad_norm": 0.3054836690425873, "learning_rate": 1.185205130399103e-07, "loss": 2.6493, "step": 11944 }, { "epoch": 0.9794312067640276, "grad_norm": 0.3023377060890198, "learning_rate": 1.1666211985598696e-07, "loss": 2.6353, "step": 11946 }, { "epoch": 0.9795951831924161, "grad_norm": 0.314871221780777, "learning_rate": 1.1481839475031009e-07, "loss": 2.6763, "step": 11948 }, { "epoch": 0.9797591596208045, "grad_norm": 0.3187420964241028, "learning_rate": 1.1298933826503488e-07, "loss": 2.6568, "step": 11950 }, { "epoch": 0.9799231360491929, "grad_norm": 0.31057143211364746, "learning_rate": 1.1117495093798114e-07, "loss": 2.6494, "step": 11952 }, { "epoch": 0.9800871124775814, "grad_norm": 0.3144533932209015, "learning_rate": 1.0937523330266652e-07, "loss": 2.6203, "step": 11954 }, { "epoch": 0.9802510889059698, "grad_norm": 0.31885576248168945, "learning_rate": 1.075901858882955e-07, "loss": 2.693, "step": 11956 }, { "epoch": 0.9804150653343582, "grad_norm": 0.3044224977493286, "learning_rate": 1.0581980921976487e-07, "loss": 2.6282, "step": 11958 }, { "epoch": 0.9805790417627466, "grad_norm": 0.3081304430961609, "learning_rate": 1.0406410381763598e-07, "loss": 2.685, "step": 11960 }, { "epoch": 0.9807430181911351, "grad_norm": 0.3102897107601166, "learning_rate": 1.0232307019817367e-07, "loss": 2.6399, "step": 11962 }, { "epoch": 0.9809069946195235, "grad_norm": 0.3002000153064728, "learning_rate": 1.0059670887333505e-07, "loss": 2.6555, "step": 11964 }, { "epoch": 0.9810709710479119, "grad_norm": 0.30638667941093445, "learning_rate": 9.888502035073633e-08, "loss": 2.6572, "step": 11966 }, { "epoch": 0.9812349474763002, "grad_norm": 0.3075307309627533, "learning_rate": 9.718800513370818e-08, "loss": 2.6371, "step": 11968 }, { "epoch": 0.9813989239046887, "grad_norm": 0.31834232807159424, "learning_rate": 9.550566372125147e-08, "loss": 2.6555, "step": 11970 }, { "epoch": 0.9815629003330771, "grad_norm": 0.30776700377464294, "learning_rate": 9.383799660804826e-08, "loss": 2.6876, "step": 11972 }, { "epoch": 0.9817268767614655, "grad_norm": 0.31957370042800903, "learning_rate": 9.218500428447852e-08, "loss": 2.6432, "step": 11974 }, { "epoch": 0.9818908531898539, "grad_norm": 0.3186641335487366, "learning_rate": 9.054668723659787e-08, "loss": 2.6376, "step": 11976 }, { "epoch": 0.9820548296182424, "grad_norm": 0.30735456943511963, "learning_rate": 8.89230459461543e-08, "loss": 2.6425, "step": 11978 }, { "epoch": 0.9822188060466308, "grad_norm": 0.304141104221344, "learning_rate": 8.731408089056592e-08, "loss": 2.6425, "step": 11980 }, { "epoch": 0.9823827824750192, "grad_norm": 0.31086212396621704, "learning_rate": 8.571979254295426e-08, "loss": 2.6584, "step": 11982 }, { "epoch": 0.9825467589034076, "grad_norm": 0.3016188442707062, "learning_rate": 8.414018137211655e-08, "loss": 2.6373, "step": 11984 }, { "epoch": 0.9827107353317961, "grad_norm": 0.3063051104545593, "learning_rate": 8.257524784252568e-08, "loss": 2.6744, "step": 11986 }, { "epoch": 0.9828747117601845, "grad_norm": 0.309219092130661, "learning_rate": 8.102499241435246e-08, "loss": 2.6323, "step": 11988 }, { "epoch": 0.9830386881885729, "grad_norm": 0.31853505969047546, "learning_rate": 7.94894155434489e-08, "loss": 2.6785, "step": 11990 }, { "epoch": 0.9832026646169613, "grad_norm": 0.31302815675735474, "learning_rate": 7.796851768133717e-08, "loss": 2.6343, "step": 11992 }, { "epoch": 0.9833666410453498, "grad_norm": 0.3105024993419647, "learning_rate": 7.646229927524284e-08, "loss": 2.6575, "step": 11994 }, { "epoch": 0.9835306174737382, "grad_norm": 0.3272876739501953, "learning_rate": 7.497076076806164e-08, "loss": 2.721, "step": 11996 }, { "epoch": 0.9836945939021265, "grad_norm": 0.3098005950450897, "learning_rate": 7.349390259838162e-08, "loss": 2.7274, "step": 11998 }, { "epoch": 0.9838585703305149, "grad_norm": 0.3111310601234436, "learning_rate": 7.203172520046652e-08, "loss": 2.6206, "step": 12000 } ], "logging_steps": 2, "max_steps": 12197, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 3000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.0132651411243008e+19, "train_batch_size": 8, "trial_name": null, "trial_params": null }