{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.7378939277478862, "eval_steps": 500, "global_step": 9000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 8.198821419420958e-05, "grad_norm": 3.1852145195007324, "learning_rate": 0.0, "loss": 12.119, "step": 1 }, { "epoch": 0.00016397642838841917, "grad_norm": 3.045940399169922, "learning_rate": 1.639344262295082e-07, "loss": 12.1143, "step": 2 }, { "epoch": 0.00032795285677683834, "grad_norm": 3.0180699825286865, "learning_rate": 4.918032786885246e-07, "loss": 12.1146, "step": 4 }, { "epoch": 0.0004919292851652575, "grad_norm": 2.994316816329956, "learning_rate": 8.19672131147541e-07, "loss": 12.1185, "step": 6 }, { "epoch": 0.0006559057135536767, "grad_norm": 3.1152169704437256, "learning_rate": 1.1475409836065575e-06, "loss": 12.1106, "step": 8 }, { "epoch": 0.0008198821419420958, "grad_norm": 3.100684881210327, "learning_rate": 1.4754098360655739e-06, "loss": 12.1086, "step": 10 }, { "epoch": 0.000983858570330515, "grad_norm": 2.950772762298584, "learning_rate": 1.8032786885245903e-06, "loss": 12.102, "step": 12 }, { "epoch": 0.001147834998718934, "grad_norm": 2.897174835205078, "learning_rate": 2.1311475409836067e-06, "loss": 12.0903, "step": 14 }, { "epoch": 0.0013118114271073534, "grad_norm": 3.0111382007598877, "learning_rate": 2.459016393442623e-06, "loss": 12.0613, "step": 16 }, { "epoch": 0.0014757878554957724, "grad_norm": 3.11842942237854, "learning_rate": 2.7868852459016396e-06, "loss": 12.0268, "step": 18 }, { "epoch": 0.0016397642838841917, "grad_norm": 3.0497119426727295, "learning_rate": 3.114754098360656e-06, "loss": 12.0057, "step": 20 }, { "epoch": 0.0018037407122726108, "grad_norm": 3.685875177383423, "learning_rate": 3.4426229508196724e-06, "loss": 11.905, "step": 22 }, { "epoch": 0.00196771714066103, "grad_norm": 3.960944175720215, "learning_rate": 3.770491803278689e-06, "loss": 11.8163, "step": 24 }, { "epoch": 0.002131693569049449, "grad_norm": 4.291749000549316, "learning_rate": 4.098360655737704e-06, "loss": 11.7568, "step": 26 }, { "epoch": 0.002295669997437868, "grad_norm": 4.350489616394043, "learning_rate": 4.426229508196722e-06, "loss": 11.6989, "step": 28 }, { "epoch": 0.0024596464258262877, "grad_norm": 4.357988357543945, "learning_rate": 4.754098360655738e-06, "loss": 11.4718, "step": 30 }, { "epoch": 0.0026236228542147067, "grad_norm": 3.87373423576355, "learning_rate": 5.0819672131147545e-06, "loss": 11.2832, "step": 32 }, { "epoch": 0.0027875992826031258, "grad_norm": 3.546539306640625, "learning_rate": 5.409836065573771e-06, "loss": 11.1534, "step": 34 }, { "epoch": 0.002951575710991545, "grad_norm": 3.165611743927002, "learning_rate": 5.737704918032787e-06, "loss": 11.0611, "step": 36 }, { "epoch": 0.0031155521393799643, "grad_norm": 2.898167848587036, "learning_rate": 6.065573770491804e-06, "loss": 10.9593, "step": 38 }, { "epoch": 0.0032795285677683834, "grad_norm": 2.756183385848999, "learning_rate": 6.393442622950819e-06, "loss": 10.8399, "step": 40 }, { "epoch": 0.0034435049961568025, "grad_norm": 2.5953481197357178, "learning_rate": 6.721311475409836e-06, "loss": 10.7619, "step": 42 }, { "epoch": 0.0036074814245452215, "grad_norm": 2.4934237003326416, "learning_rate": 7.049180327868852e-06, "loss": 10.6836, "step": 44 }, { "epoch": 0.0037714578529336406, "grad_norm": 2.4235870838165283, "learning_rate": 7.3770491803278695e-06, "loss": 10.6374, "step": 46 }, { "epoch": 0.00393543428132206, "grad_norm": 2.4326910972595215, "learning_rate": 7.704918032786886e-06, "loss": 10.5798, "step": 48 }, { "epoch": 0.004099410709710479, "grad_norm": 2.3508763313293457, "learning_rate": 8.032786885245902e-06, "loss": 10.5475, "step": 50 }, { "epoch": 0.004263387138098898, "grad_norm": 2.411895513534546, "learning_rate": 8.360655737704919e-06, "loss": 10.4924, "step": 52 }, { "epoch": 0.004427363566487317, "grad_norm": 2.3643436431884766, "learning_rate": 8.688524590163935e-06, "loss": 10.4639, "step": 54 }, { "epoch": 0.004591339994875736, "grad_norm": 2.3757126331329346, "learning_rate": 9.016393442622952e-06, "loss": 10.4338, "step": 56 }, { "epoch": 0.004755316423264155, "grad_norm": 2.3393900394439697, "learning_rate": 9.344262295081968e-06, "loss": 10.3862, "step": 58 }, { "epoch": 0.004919292851652575, "grad_norm": 2.3030688762664795, "learning_rate": 9.672131147540984e-06, "loss": 10.3828, "step": 60 }, { "epoch": 0.005083269280040994, "grad_norm": 2.290266275405884, "learning_rate": 1e-05, "loss": 10.3545, "step": 62 }, { "epoch": 0.005247245708429413, "grad_norm": 2.3040578365325928, "learning_rate": 1.0327868852459017e-05, "loss": 10.3075, "step": 64 }, { "epoch": 0.0054112221368178325, "grad_norm": 2.2848293781280518, "learning_rate": 1.0655737704918032e-05, "loss": 10.2883, "step": 66 }, { "epoch": 0.0055751985652062516, "grad_norm": 2.261911630630493, "learning_rate": 1.0983606557377048e-05, "loss": 10.2328, "step": 68 }, { "epoch": 0.005739174993594671, "grad_norm": 2.2474453449249268, "learning_rate": 1.1311475409836065e-05, "loss": 10.2065, "step": 70 }, { "epoch": 0.00590315142198309, "grad_norm": 2.2612874507904053, "learning_rate": 1.1639344262295083e-05, "loss": 10.1581, "step": 72 }, { "epoch": 0.006067127850371509, "grad_norm": 2.293903112411499, "learning_rate": 1.19672131147541e-05, "loss": 10.0998, "step": 74 }, { "epoch": 0.006231104278759929, "grad_norm": 2.2207071781158447, "learning_rate": 1.2295081967213116e-05, "loss": 10.087, "step": 76 }, { "epoch": 0.006395080707148348, "grad_norm": 2.2724802494049072, "learning_rate": 1.2622950819672132e-05, "loss": 10.006, "step": 78 }, { "epoch": 0.006559057135536767, "grad_norm": 2.18399977684021, "learning_rate": 1.2950819672131149e-05, "loss": 9.9685, "step": 80 }, { "epoch": 0.006723033563925186, "grad_norm": 2.2227847576141357, "learning_rate": 1.3278688524590163e-05, "loss": 9.8784, "step": 82 }, { "epoch": 0.006887009992313605, "grad_norm": 2.275341033935547, "learning_rate": 1.3606557377049181e-05, "loss": 9.7873, "step": 84 }, { "epoch": 0.007050986420702024, "grad_norm": 2.170790195465088, "learning_rate": 1.3934426229508196e-05, "loss": 9.758, "step": 86 }, { "epoch": 0.007214962849090443, "grad_norm": 2.3812711238861084, "learning_rate": 1.4262295081967214e-05, "loss": 9.6872, "step": 88 }, { "epoch": 0.007378939277478862, "grad_norm": 2.2956268787384033, "learning_rate": 1.4590163934426229e-05, "loss": 9.6342, "step": 90 }, { "epoch": 0.007542915705867281, "grad_norm": 4.3424859046936035, "learning_rate": 1.4918032786885247e-05, "loss": 9.599, "step": 92 }, { "epoch": 0.007706892134255701, "grad_norm": 2.303790807723999, "learning_rate": 1.5245901639344262e-05, "loss": 9.4832, "step": 94 }, { "epoch": 0.00787086856264412, "grad_norm": 2.4979960918426514, "learning_rate": 1.557377049180328e-05, "loss": 9.4503, "step": 96 }, { "epoch": 0.008034844991032538, "grad_norm": 1.9802457094192505, "learning_rate": 1.5901639344262295e-05, "loss": 9.3706, "step": 98 }, { "epoch": 0.008198821419420958, "grad_norm": 2.263692855834961, "learning_rate": 1.6229508196721314e-05, "loss": 9.3359, "step": 100 }, { "epoch": 0.008362797847809378, "grad_norm": 2.014167547225952, "learning_rate": 1.6557377049180328e-05, "loss": 9.2529, "step": 102 }, { "epoch": 0.008526774276197796, "grad_norm": 2.3031680583953857, "learning_rate": 1.6885245901639347e-05, "loss": 9.1999, "step": 104 }, { "epoch": 0.008690750704586216, "grad_norm": 1.8865060806274414, "learning_rate": 1.721311475409836e-05, "loss": 9.2385, "step": 106 }, { "epoch": 0.008854727132974635, "grad_norm": 1.783657431602478, "learning_rate": 1.754098360655738e-05, "loss": 9.1613, "step": 108 }, { "epoch": 0.009018703561363054, "grad_norm": 1.7754466533660889, "learning_rate": 1.7868852459016393e-05, "loss": 9.0875, "step": 110 }, { "epoch": 0.009182679989751473, "grad_norm": 2.1157960891723633, "learning_rate": 1.8196721311475413e-05, "loss": 9.0315, "step": 112 }, { "epoch": 0.009346656418139893, "grad_norm": 2.1570520401000977, "learning_rate": 1.8524590163934426e-05, "loss": 8.988, "step": 114 }, { "epoch": 0.00951063284652831, "grad_norm": 2.070383071899414, "learning_rate": 1.8852459016393442e-05, "loss": 8.9084, "step": 116 }, { "epoch": 0.00967460927491673, "grad_norm": 2.004547119140625, "learning_rate": 1.918032786885246e-05, "loss": 8.8827, "step": 118 }, { "epoch": 0.00983858570330515, "grad_norm": 2.282608985900879, "learning_rate": 1.9508196721311475e-05, "loss": 8.8536, "step": 120 }, { "epoch": 0.010002562131693569, "grad_norm": 1.872442364692688, "learning_rate": 1.9836065573770492e-05, "loss": 8.8046, "step": 122 }, { "epoch": 0.010166538560081989, "grad_norm": 2.8452837467193604, "learning_rate": 2.0163934426229508e-05, "loss": 8.7791, "step": 124 }, { "epoch": 0.010330514988470407, "grad_norm": 2.155548095703125, "learning_rate": 2.0491803278688525e-05, "loss": 8.7161, "step": 126 }, { "epoch": 0.010494491416858827, "grad_norm": 2.887465238571167, "learning_rate": 2.081967213114754e-05, "loss": 8.646, "step": 128 }, { "epoch": 0.010658467845247245, "grad_norm": 1.7281243801116943, "learning_rate": 2.114754098360656e-05, "loss": 8.6342, "step": 130 }, { "epoch": 0.010822444273635665, "grad_norm": 2.309556484222412, "learning_rate": 2.1475409836065574e-05, "loss": 8.6055, "step": 132 }, { "epoch": 0.010986420702024083, "grad_norm": 2.6733663082122803, "learning_rate": 2.1803278688524594e-05, "loss": 8.5734, "step": 134 }, { "epoch": 0.011150397130412503, "grad_norm": 1.8595812320709229, "learning_rate": 2.2131147540983607e-05, "loss": 8.5053, "step": 136 }, { "epoch": 0.011314373558800923, "grad_norm": 2.128081798553467, "learning_rate": 2.2459016393442626e-05, "loss": 8.5065, "step": 138 }, { "epoch": 0.011478349987189341, "grad_norm": 2.7606353759765625, "learning_rate": 2.278688524590164e-05, "loss": 8.4438, "step": 140 }, { "epoch": 0.011642326415577761, "grad_norm": 4.279053688049316, "learning_rate": 2.311475409836066e-05, "loss": 8.3871, "step": 142 }, { "epoch": 0.01180630284396618, "grad_norm": 2.8464019298553467, "learning_rate": 2.3442622950819672e-05, "loss": 8.3569, "step": 144 }, { "epoch": 0.0119702792723546, "grad_norm": 1.880401611328125, "learning_rate": 2.377049180327869e-05, "loss": 8.3394, "step": 146 }, { "epoch": 0.012134255700743017, "grad_norm": 1.8325446844100952, "learning_rate": 2.4098360655737705e-05, "loss": 8.2505, "step": 148 }, { "epoch": 0.012298232129131437, "grad_norm": 2.5180068016052246, "learning_rate": 2.442622950819672e-05, "loss": 8.2408, "step": 150 }, { "epoch": 0.012462208557519857, "grad_norm": 1.706740379333496, "learning_rate": 2.4754098360655738e-05, "loss": 8.2324, "step": 152 }, { "epoch": 0.012626184985908276, "grad_norm": 1.7287302017211914, "learning_rate": 2.5081967213114754e-05, "loss": 8.2029, "step": 154 }, { "epoch": 0.012790161414296695, "grad_norm": 2.2318031787872314, "learning_rate": 2.540983606557377e-05, "loss": 8.1472, "step": 156 }, { "epoch": 0.012954137842685114, "grad_norm": 2.371234655380249, "learning_rate": 2.573770491803279e-05, "loss": 8.0911, "step": 158 }, { "epoch": 0.013118114271073534, "grad_norm": 2.0684690475463867, "learning_rate": 2.6065573770491804e-05, "loss": 8.0566, "step": 160 }, { "epoch": 0.013282090699461952, "grad_norm": 2.811659097671509, "learning_rate": 2.639344262295082e-05, "loss": 8.0289, "step": 162 }, { "epoch": 0.013446067127850372, "grad_norm": 1.9651939868927002, "learning_rate": 2.6721311475409837e-05, "loss": 7.9698, "step": 164 }, { "epoch": 0.01361004355623879, "grad_norm": 2.164975166320801, "learning_rate": 2.7049180327868856e-05, "loss": 7.9336, "step": 166 }, { "epoch": 0.01377401998462721, "grad_norm": 1.353088140487671, "learning_rate": 2.737704918032787e-05, "loss": 7.9496, "step": 168 }, { "epoch": 0.01393799641301563, "grad_norm": 1.452012538909912, "learning_rate": 2.7704918032786886e-05, "loss": 7.8609, "step": 170 }, { "epoch": 0.014101972841404048, "grad_norm": 2.3023464679718018, "learning_rate": 2.8032786885245906e-05, "loss": 7.8505, "step": 172 }, { "epoch": 0.014265949269792468, "grad_norm": 4.203457832336426, "learning_rate": 2.8360655737704922e-05, "loss": 7.8213, "step": 174 }, { "epoch": 0.014429925698180886, "grad_norm": 3.246518135070801, "learning_rate": 2.8688524590163935e-05, "loss": 7.7589, "step": 176 }, { "epoch": 0.014593902126569306, "grad_norm": 2.139510154724121, "learning_rate": 2.901639344262295e-05, "loss": 7.8026, "step": 178 }, { "epoch": 0.014757878554957724, "grad_norm": 1.7982274293899536, "learning_rate": 2.934426229508197e-05, "loss": 7.7061, "step": 180 }, { "epoch": 0.014921854983346144, "grad_norm": 2.1394877433776855, "learning_rate": 2.967213114754098e-05, "loss": 7.6849, "step": 182 }, { "epoch": 0.015085831411734562, "grad_norm": 1.9051532745361328, "learning_rate": 3e-05, "loss": 7.6823, "step": 184 }, { "epoch": 0.015249807840122982, "grad_norm": 2.402742862701416, "learning_rate": 3.0327868852459017e-05, "loss": 7.6698, "step": 186 }, { "epoch": 0.015413784268511402, "grad_norm": 2.6151533126831055, "learning_rate": 3.065573770491804e-05, "loss": 7.5872, "step": 188 }, { "epoch": 0.01557776069689982, "grad_norm": 1.7072643041610718, "learning_rate": 3.098360655737705e-05, "loss": 7.5527, "step": 190 }, { "epoch": 0.01574173712528824, "grad_norm": 1.5905566215515137, "learning_rate": 3.131147540983606e-05, "loss": 7.5412, "step": 192 }, { "epoch": 0.01590571355367666, "grad_norm": 2.3824453353881836, "learning_rate": 3.163934426229508e-05, "loss": 7.5453, "step": 194 }, { "epoch": 0.016069689982065077, "grad_norm": 1.4948384761810303, "learning_rate": 3.19672131147541e-05, "loss": 7.4671, "step": 196 }, { "epoch": 0.0162336664104535, "grad_norm": 2.2609403133392334, "learning_rate": 3.2295081967213116e-05, "loss": 7.4387, "step": 198 }, { "epoch": 0.016397642838841917, "grad_norm": 1.8224328756332397, "learning_rate": 3.2622950819672136e-05, "loss": 7.3725, "step": 200 }, { "epoch": 0.016561619267230335, "grad_norm": 1.9965217113494873, "learning_rate": 3.295081967213115e-05, "loss": 7.3654, "step": 202 }, { "epoch": 0.016725595695618756, "grad_norm": 1.1850310564041138, "learning_rate": 3.327868852459017e-05, "loss": 7.3385, "step": 204 }, { "epoch": 0.016889572124007175, "grad_norm": 1.642038106918335, "learning_rate": 3.360655737704918e-05, "loss": 7.3, "step": 206 }, { "epoch": 0.017053548552395593, "grad_norm": 1.9536902904510498, "learning_rate": 3.39344262295082e-05, "loss": 7.273, "step": 208 }, { "epoch": 0.01721752498078401, "grad_norm": 1.829714059829712, "learning_rate": 3.4262295081967214e-05, "loss": 7.2287, "step": 210 }, { "epoch": 0.017381501409172433, "grad_norm": 2.4997904300689697, "learning_rate": 3.459016393442623e-05, "loss": 7.2496, "step": 212 }, { "epoch": 0.01754547783756085, "grad_norm": 3.472687244415283, "learning_rate": 3.491803278688525e-05, "loss": 7.1957, "step": 214 }, { "epoch": 0.01770945426594927, "grad_norm": 3.043635845184326, "learning_rate": 3.524590163934427e-05, "loss": 7.1526, "step": 216 }, { "epoch": 0.01787343069433769, "grad_norm": 3.5498316287994385, "learning_rate": 3.557377049180328e-05, "loss": 7.1433, "step": 218 }, { "epoch": 0.01803740712272611, "grad_norm": 2.9172403812408447, "learning_rate": 3.590163934426229e-05, "loss": 7.1423, "step": 220 }, { "epoch": 0.018201383551114527, "grad_norm": 2.57663893699646, "learning_rate": 3.622950819672131e-05, "loss": 7.0919, "step": 222 }, { "epoch": 0.018365359979502945, "grad_norm": 1.6703250408172607, "learning_rate": 3.655737704918033e-05, "loss": 7.061, "step": 224 }, { "epoch": 0.018529336407891367, "grad_norm": 1.2947953939437866, "learning_rate": 3.6885245901639346e-05, "loss": 7.0452, "step": 226 }, { "epoch": 0.018693312836279785, "grad_norm": 1.5165050029754639, "learning_rate": 3.721311475409836e-05, "loss": 7.0161, "step": 228 }, { "epoch": 0.018857289264668203, "grad_norm": 2.0093023777008057, "learning_rate": 3.754098360655738e-05, "loss": 7.0072, "step": 230 }, { "epoch": 0.01902126569305662, "grad_norm": 2.3759045600891113, "learning_rate": 3.78688524590164e-05, "loss": 6.9464, "step": 232 }, { "epoch": 0.019185242121445043, "grad_norm": 2.2470510005950928, "learning_rate": 3.819672131147541e-05, "loss": 6.9827, "step": 234 }, { "epoch": 0.01934921854983346, "grad_norm": 1.9213268756866455, "learning_rate": 3.8524590163934424e-05, "loss": 6.938, "step": 236 }, { "epoch": 0.01951319497822188, "grad_norm": 1.64090895652771, "learning_rate": 3.8852459016393444e-05, "loss": 6.8792, "step": 238 }, { "epoch": 0.0196771714066103, "grad_norm": 1.7019102573394775, "learning_rate": 3.9180327868852464e-05, "loss": 6.9123, "step": 240 }, { "epoch": 0.01984114783499872, "grad_norm": 1.2651671171188354, "learning_rate": 3.950819672131148e-05, "loss": 6.8878, "step": 242 }, { "epoch": 0.020005124263387138, "grad_norm": 1.6623637676239014, "learning_rate": 3.983606557377049e-05, "loss": 6.8734, "step": 244 }, { "epoch": 0.020169100691775556, "grad_norm": 2.097914695739746, "learning_rate": 4.016393442622951e-05, "loss": 6.8284, "step": 246 }, { "epoch": 0.020333077120163977, "grad_norm": 1.9533648490905762, "learning_rate": 4.049180327868853e-05, "loss": 6.8234, "step": 248 }, { "epoch": 0.020497053548552396, "grad_norm": 3.1760356426239014, "learning_rate": 4.081967213114754e-05, "loss": 6.8269, "step": 250 }, { "epoch": 0.020661029976940814, "grad_norm": 3.1443092823028564, "learning_rate": 4.1147540983606556e-05, "loss": 6.8064, "step": 252 }, { "epoch": 0.020825006405329236, "grad_norm": 2.328242301940918, "learning_rate": 4.1475409836065575e-05, "loss": 6.7787, "step": 254 }, { "epoch": 0.020988982833717654, "grad_norm": 1.6525546312332153, "learning_rate": 4.1803278688524595e-05, "loss": 6.7703, "step": 256 }, { "epoch": 0.021152959262106072, "grad_norm": 1.2344805002212524, "learning_rate": 4.213114754098361e-05, "loss": 6.7074, "step": 258 }, { "epoch": 0.02131693569049449, "grad_norm": 1.625827670097351, "learning_rate": 4.245901639344262e-05, "loss": 6.7281, "step": 260 }, { "epoch": 0.021480912118882912, "grad_norm": 1.2819187641143799, "learning_rate": 4.278688524590164e-05, "loss": 6.7493, "step": 262 }, { "epoch": 0.02164488854727133, "grad_norm": 1.8072625398635864, "learning_rate": 4.311475409836066e-05, "loss": 6.6968, "step": 264 }, { "epoch": 0.021808864975659748, "grad_norm": 1.3799159526824951, "learning_rate": 4.3442622950819674e-05, "loss": 6.688, "step": 266 }, { "epoch": 0.021972841404048166, "grad_norm": 2.2259645462036133, "learning_rate": 4.377049180327869e-05, "loss": 6.6849, "step": 268 }, { "epoch": 0.022136817832436588, "grad_norm": 2.3046491146087646, "learning_rate": 4.409836065573771e-05, "loss": 6.6438, "step": 270 }, { "epoch": 0.022300794260825006, "grad_norm": 2.3048136234283447, "learning_rate": 4.442622950819673e-05, "loss": 6.5669, "step": 272 }, { "epoch": 0.022464770689213424, "grad_norm": 1.427413821220398, "learning_rate": 4.475409836065574e-05, "loss": 6.6016, "step": 274 }, { "epoch": 0.022628747117601846, "grad_norm": 1.9164036512374878, "learning_rate": 4.508196721311476e-05, "loss": 6.5754, "step": 276 }, { "epoch": 0.022792723545990264, "grad_norm": 2.1851377487182617, "learning_rate": 4.540983606557377e-05, "loss": 6.5257, "step": 278 }, { "epoch": 0.022956699974378682, "grad_norm": 1.622302770614624, "learning_rate": 4.5737704918032786e-05, "loss": 6.5406, "step": 280 }, { "epoch": 0.0231206764027671, "grad_norm": 1.7634437084197998, "learning_rate": 4.6065573770491805e-05, "loss": 6.5259, "step": 282 }, { "epoch": 0.023284652831155522, "grad_norm": 2.2173545360565186, "learning_rate": 4.6393442622950825e-05, "loss": 6.5383, "step": 284 }, { "epoch": 0.02344862925954394, "grad_norm": 1.620126724243164, "learning_rate": 4.672131147540984e-05, "loss": 6.5118, "step": 286 }, { "epoch": 0.02361260568793236, "grad_norm": 2.0221054553985596, "learning_rate": 4.704918032786885e-05, "loss": 6.4726, "step": 288 }, { "epoch": 0.02377658211632078, "grad_norm": 1.2866939306259155, "learning_rate": 4.737704918032787e-05, "loss": 6.4646, "step": 290 }, { "epoch": 0.0239405585447092, "grad_norm": 1.9667149782180786, "learning_rate": 4.770491803278689e-05, "loss": 6.4897, "step": 292 }, { "epoch": 0.024104534973097617, "grad_norm": 2.3963539600372314, "learning_rate": 4.8032786885245904e-05, "loss": 6.4017, "step": 294 }, { "epoch": 0.024268511401486035, "grad_norm": 2.0867695808410645, "learning_rate": 4.836065573770492e-05, "loss": 6.4881, "step": 296 }, { "epoch": 0.024432487829874457, "grad_norm": 2.1272687911987305, "learning_rate": 4.868852459016394e-05, "loss": 6.3954, "step": 298 }, { "epoch": 0.024596464258262875, "grad_norm": 1.3982235193252563, "learning_rate": 4.9016393442622957e-05, "loss": 6.4431, "step": 300 }, { "epoch": 0.024760440686651293, "grad_norm": 2.191251039505005, "learning_rate": 4.934426229508197e-05, "loss": 6.4309, "step": 302 }, { "epoch": 0.024924417115039715, "grad_norm": 1.7500178813934326, "learning_rate": 4.967213114754098e-05, "loss": 6.3676, "step": 304 }, { "epoch": 0.025088393543428133, "grad_norm": 1.4814640283584595, "learning_rate": 5e-05, "loss": 6.4043, "step": 306 }, { "epoch": 0.02525236997181655, "grad_norm": 1.549419641494751, "learning_rate": 5.0327868852459015e-05, "loss": 6.3276, "step": 308 }, { "epoch": 0.02541634640020497, "grad_norm": 1.4538007974624634, "learning_rate": 5.0655737704918035e-05, "loss": 6.2641, "step": 310 }, { "epoch": 0.02558032282859339, "grad_norm": 1.4905295372009277, "learning_rate": 5.098360655737705e-05, "loss": 6.3035, "step": 312 }, { "epoch": 0.02574429925698181, "grad_norm": 1.6055713891983032, "learning_rate": 5.131147540983606e-05, "loss": 6.3119, "step": 314 }, { "epoch": 0.025908275685370227, "grad_norm": 1.8923226594924927, "learning_rate": 5.163934426229509e-05, "loss": 6.2762, "step": 316 }, { "epoch": 0.026072252113758645, "grad_norm": 1.6730965375900269, "learning_rate": 5.19672131147541e-05, "loss": 6.262, "step": 318 }, { "epoch": 0.026236228542147067, "grad_norm": 1.7695355415344238, "learning_rate": 5.229508196721312e-05, "loss": 6.2443, "step": 320 }, { "epoch": 0.026400204970535485, "grad_norm": 1.5890907049179077, "learning_rate": 5.2622950819672134e-05, "loss": 6.2874, "step": 322 }, { "epoch": 0.026564181398923904, "grad_norm": 1.351945161819458, "learning_rate": 5.295081967213115e-05, "loss": 6.1962, "step": 324 }, { "epoch": 0.026728157827312325, "grad_norm": 1.8272804021835327, "learning_rate": 5.327868852459017e-05, "loss": 6.2117, "step": 326 }, { "epoch": 0.026892134255700743, "grad_norm": 1.517553448677063, "learning_rate": 5.360655737704918e-05, "loss": 6.226, "step": 328 }, { "epoch": 0.02705611068408916, "grad_norm": 1.2578155994415283, "learning_rate": 5.393442622950819e-05, "loss": 6.1564, "step": 330 }, { "epoch": 0.02722008711247758, "grad_norm": 1.3466731309890747, "learning_rate": 5.426229508196722e-05, "loss": 6.2005, "step": 332 }, { "epoch": 0.027384063540866, "grad_norm": 1.8164156675338745, "learning_rate": 5.459016393442623e-05, "loss": 6.2395, "step": 334 }, { "epoch": 0.02754803996925442, "grad_norm": 1.9037235975265503, "learning_rate": 5.491803278688525e-05, "loss": 6.1536, "step": 336 }, { "epoch": 0.027712016397642838, "grad_norm": 2.0508320331573486, "learning_rate": 5.5245901639344265e-05, "loss": 6.1402, "step": 338 }, { "epoch": 0.02787599282603126, "grad_norm": 2.135850667953491, "learning_rate": 5.557377049180328e-05, "loss": 6.1144, "step": 340 }, { "epoch": 0.028039969254419678, "grad_norm": 1.4268845319747925, "learning_rate": 5.5901639344262305e-05, "loss": 6.0949, "step": 342 }, { "epoch": 0.028203945682808096, "grad_norm": 1.7478644847869873, "learning_rate": 5.622950819672132e-05, "loss": 6.0932, "step": 344 }, { "epoch": 0.028367922111196514, "grad_norm": 1.3711293935775757, "learning_rate": 5.6557377049180324e-05, "loss": 6.0383, "step": 346 }, { "epoch": 0.028531898539584936, "grad_norm": 2.102510929107666, "learning_rate": 5.688524590163935e-05, "loss": 6.151, "step": 348 }, { "epoch": 0.028695874967973354, "grad_norm": 2.1685709953308105, "learning_rate": 5.7213114754098364e-05, "loss": 6.0951, "step": 350 }, { "epoch": 0.028859851396361772, "grad_norm": 1.92462158203125, "learning_rate": 5.754098360655738e-05, "loss": 6.1199, "step": 352 }, { "epoch": 0.029023827824750194, "grad_norm": 2.6841022968292236, "learning_rate": 5.7868852459016396e-05, "loss": 6.0132, "step": 354 }, { "epoch": 0.029187804253138612, "grad_norm": 2.1786410808563232, "learning_rate": 5.819672131147541e-05, "loss": 6.0524, "step": 356 }, { "epoch": 0.02935178068152703, "grad_norm": 2.2823619842529297, "learning_rate": 5.8524590163934436e-05, "loss": 6.0581, "step": 358 }, { "epoch": 0.02951575710991545, "grad_norm": 1.539980173110962, "learning_rate": 5.885245901639345e-05, "loss": 6.0375, "step": 360 }, { "epoch": 0.02967973353830387, "grad_norm": 2.02248215675354, "learning_rate": 5.9180327868852455e-05, "loss": 6.024, "step": 362 }, { "epoch": 0.029843709966692288, "grad_norm": 2.0817408561706543, "learning_rate": 5.950819672131148e-05, "loss": 5.9895, "step": 364 }, { "epoch": 0.030007686395080706, "grad_norm": 2.433933973312378, "learning_rate": 5.9836065573770495e-05, "loss": 5.9542, "step": 366 }, { "epoch": 0.030171662823469125, "grad_norm": 1.9378852844238281, "learning_rate": 6.016393442622951e-05, "loss": 6.0731, "step": 368 }, { "epoch": 0.030335639251857546, "grad_norm": 1.5799286365509033, "learning_rate": 6.049180327868853e-05, "loss": 5.941, "step": 370 }, { "epoch": 0.030499615680245964, "grad_norm": 1.8533333539962769, "learning_rate": 6.081967213114754e-05, "loss": 5.9119, "step": 372 }, { "epoch": 0.030663592108634383, "grad_norm": 1.8558950424194336, "learning_rate": 6.114754098360656e-05, "loss": 5.9879, "step": 374 }, { "epoch": 0.030827568537022804, "grad_norm": 1.987197756767273, "learning_rate": 6.147540983606557e-05, "loss": 5.9686, "step": 376 }, { "epoch": 0.030991544965411223, "grad_norm": 2.317286491394043, "learning_rate": 6.180327868852459e-05, "loss": 5.9557, "step": 378 }, { "epoch": 0.03115552139379964, "grad_norm": 2.142669916152954, "learning_rate": 6.213114754098361e-05, "loss": 5.8988, "step": 380 }, { "epoch": 0.03131949782218806, "grad_norm": 2.495762825012207, "learning_rate": 6.245901639344263e-05, "loss": 5.9131, "step": 382 }, { "epoch": 0.03148347425057648, "grad_norm": 1.6954542398452759, "learning_rate": 6.278688524590164e-05, "loss": 5.9113, "step": 384 }, { "epoch": 0.0316474506789649, "grad_norm": 1.8803491592407227, "learning_rate": 6.311475409836067e-05, "loss": 5.976, "step": 386 }, { "epoch": 0.03181142710735332, "grad_norm": 1.8088006973266602, "learning_rate": 6.344262295081968e-05, "loss": 5.9088, "step": 388 }, { "epoch": 0.031975403535741735, "grad_norm": 1.9314664602279663, "learning_rate": 6.377049180327869e-05, "loss": 5.8998, "step": 390 }, { "epoch": 0.03213937996413015, "grad_norm": 2.3078742027282715, "learning_rate": 6.40983606557377e-05, "loss": 5.9263, "step": 392 }, { "epoch": 0.03230335639251858, "grad_norm": 2.3376059532165527, "learning_rate": 6.442622950819672e-05, "loss": 5.8472, "step": 394 }, { "epoch": 0.032467332820907, "grad_norm": 2.106436252593994, "learning_rate": 6.475409836065574e-05, "loss": 5.8755, "step": 396 }, { "epoch": 0.032631309249295415, "grad_norm": 1.564982533454895, "learning_rate": 6.508196721311476e-05, "loss": 5.8482, "step": 398 }, { "epoch": 0.03279528567768383, "grad_norm": 1.3622092008590698, "learning_rate": 6.540983606557377e-05, "loss": 5.8217, "step": 400 }, { "epoch": 0.03295926210607225, "grad_norm": 2.422950029373169, "learning_rate": 6.57377049180328e-05, "loss": 5.8151, "step": 402 }, { "epoch": 0.03312323853446067, "grad_norm": 2.1505627632141113, "learning_rate": 6.606557377049181e-05, "loss": 5.8411, "step": 404 }, { "epoch": 0.03328721496284909, "grad_norm": 2.70080304145813, "learning_rate": 6.639344262295082e-05, "loss": 5.8123, "step": 406 }, { "epoch": 0.03345119139123751, "grad_norm": 2.5012848377227783, "learning_rate": 6.672131147540984e-05, "loss": 5.8724, "step": 408 }, { "epoch": 0.03361516781962593, "grad_norm": 1.424917459487915, "learning_rate": 6.704918032786885e-05, "loss": 5.7876, "step": 410 }, { "epoch": 0.03377914424801435, "grad_norm": 2.5553343296051025, "learning_rate": 6.737704918032786e-05, "loss": 5.7938, "step": 412 }, { "epoch": 0.03394312067640277, "grad_norm": 1.8178255558013916, "learning_rate": 6.770491803278689e-05, "loss": 5.8053, "step": 414 }, { "epoch": 0.034107097104791186, "grad_norm": 1.838123083114624, "learning_rate": 6.80327868852459e-05, "loss": 5.7617, "step": 416 }, { "epoch": 0.034271073533179604, "grad_norm": 2.1789233684539795, "learning_rate": 6.836065573770493e-05, "loss": 5.7346, "step": 418 }, { "epoch": 0.03443504996156802, "grad_norm": 1.6680387258529663, "learning_rate": 6.868852459016394e-05, "loss": 5.7538, "step": 420 }, { "epoch": 0.03459902638995645, "grad_norm": 1.8677077293395996, "learning_rate": 6.901639344262295e-05, "loss": 5.7391, "step": 422 }, { "epoch": 0.034763002818344865, "grad_norm": 1.6001735925674438, "learning_rate": 6.934426229508197e-05, "loss": 5.7602, "step": 424 }, { "epoch": 0.03492697924673328, "grad_norm": 1.8411906957626343, "learning_rate": 6.967213114754098e-05, "loss": 5.7353, "step": 426 }, { "epoch": 0.0350909556751217, "grad_norm": 1.8191148042678833, "learning_rate": 7e-05, "loss": 5.6915, "step": 428 }, { "epoch": 0.03525493210351012, "grad_norm": 1.5732213258743286, "learning_rate": 7.032786885245902e-05, "loss": 5.7435, "step": 430 }, { "epoch": 0.03541890853189854, "grad_norm": 1.490302562713623, "learning_rate": 7.065573770491803e-05, "loss": 5.7039, "step": 432 }, { "epoch": 0.035582884960286956, "grad_norm": 2.1343276500701904, "learning_rate": 7.098360655737706e-05, "loss": 5.6742, "step": 434 }, { "epoch": 0.03574686138867538, "grad_norm": 2.2556490898132324, "learning_rate": 7.131147540983607e-05, "loss": 5.68, "step": 436 }, { "epoch": 0.0359108378170638, "grad_norm": 1.9686386585235596, "learning_rate": 7.163934426229509e-05, "loss": 5.7184, "step": 438 }, { "epoch": 0.03607481424545222, "grad_norm": 1.8209033012390137, "learning_rate": 7.196721311475411e-05, "loss": 5.6744, "step": 440 }, { "epoch": 0.036238790673840636, "grad_norm": 1.4810411930084229, "learning_rate": 7.229508196721311e-05, "loss": 5.6897, "step": 442 }, { "epoch": 0.036402767102229054, "grad_norm": 1.438828706741333, "learning_rate": 7.262295081967213e-05, "loss": 5.7104, "step": 444 }, { "epoch": 0.03656674353061747, "grad_norm": 1.9264169931411743, "learning_rate": 7.295081967213115e-05, "loss": 5.6648, "step": 446 }, { "epoch": 0.03673071995900589, "grad_norm": 1.5463581085205078, "learning_rate": 7.327868852459016e-05, "loss": 5.6325, "step": 448 }, { "epoch": 0.03689469638739431, "grad_norm": 2.308321237564087, "learning_rate": 7.360655737704918e-05, "loss": 5.6288, "step": 450 }, { "epoch": 0.037058672815782734, "grad_norm": 1.6210638284683228, "learning_rate": 7.39344262295082e-05, "loss": 5.5833, "step": 452 }, { "epoch": 0.03722264924417115, "grad_norm": 1.9419602155685425, "learning_rate": 7.426229508196722e-05, "loss": 5.6552, "step": 454 }, { "epoch": 0.03738662567255957, "grad_norm": 1.5004584789276123, "learning_rate": 7.459016393442624e-05, "loss": 5.5908, "step": 456 }, { "epoch": 0.03755060210094799, "grad_norm": 1.7404072284698486, "learning_rate": 7.491803278688526e-05, "loss": 5.5853, "step": 458 }, { "epoch": 0.03771457852933641, "grad_norm": 2.1047239303588867, "learning_rate": 7.524590163934426e-05, "loss": 5.6346, "step": 460 }, { "epoch": 0.037878554957724825, "grad_norm": 2.2261962890625, "learning_rate": 7.557377049180328e-05, "loss": 5.6325, "step": 462 }, { "epoch": 0.03804253138611324, "grad_norm": 1.9876081943511963, "learning_rate": 7.59016393442623e-05, "loss": 5.5733, "step": 464 }, { "epoch": 0.03820650781450167, "grad_norm": 1.9988337755203247, "learning_rate": 7.622950819672131e-05, "loss": 5.5402, "step": 466 }, { "epoch": 0.038370484242890086, "grad_norm": 1.896393060684204, "learning_rate": 7.655737704918034e-05, "loss": 5.5694, "step": 468 }, { "epoch": 0.038534460671278505, "grad_norm": 1.8517329692840576, "learning_rate": 7.688524590163935e-05, "loss": 5.5517, "step": 470 }, { "epoch": 0.03869843709966692, "grad_norm": 2.0797197818756104, "learning_rate": 7.721311475409836e-05, "loss": 5.5111, "step": 472 }, { "epoch": 0.03886241352805534, "grad_norm": 1.4706847667694092, "learning_rate": 7.754098360655739e-05, "loss": 5.5535, "step": 474 }, { "epoch": 0.03902638995644376, "grad_norm": 1.4342091083526611, "learning_rate": 7.78688524590164e-05, "loss": 5.5338, "step": 476 }, { "epoch": 0.03919036638483218, "grad_norm": 1.520163893699646, "learning_rate": 7.819672131147541e-05, "loss": 5.4999, "step": 478 }, { "epoch": 0.0393543428132206, "grad_norm": 1.479134202003479, "learning_rate": 7.852459016393443e-05, "loss": 5.5316, "step": 480 }, { "epoch": 0.03951831924160902, "grad_norm": 1.9230724573135376, "learning_rate": 7.885245901639344e-05, "loss": 5.4864, "step": 482 }, { "epoch": 0.03968229566999744, "grad_norm": 1.561827301979065, "learning_rate": 7.918032786885247e-05, "loss": 5.5004, "step": 484 }, { "epoch": 0.03984627209838586, "grad_norm": 1.9881266355514526, "learning_rate": 7.950819672131148e-05, "loss": 5.5241, "step": 486 }, { "epoch": 0.040010248526774275, "grad_norm": 2.184860944747925, "learning_rate": 7.98360655737705e-05, "loss": 5.4425, "step": 488 }, { "epoch": 0.04017422495516269, "grad_norm": 1.7443156242370605, "learning_rate": 8.016393442622952e-05, "loss": 5.4779, "step": 490 }, { "epoch": 0.04033820138355111, "grad_norm": 1.4682708978652954, "learning_rate": 8.049180327868853e-05, "loss": 5.5347, "step": 492 }, { "epoch": 0.04050217781193954, "grad_norm": 1.3537533283233643, "learning_rate": 8.081967213114755e-05, "loss": 5.4484, "step": 494 }, { "epoch": 0.040666154240327955, "grad_norm": 2.4647626876831055, "learning_rate": 8.114754098360656e-05, "loss": 5.428, "step": 496 }, { "epoch": 0.04083013066871637, "grad_norm": 2.0095391273498535, "learning_rate": 8.147540983606557e-05, "loss": 5.4425, "step": 498 }, { "epoch": 0.04099410709710479, "grad_norm": 2.309438705444336, "learning_rate": 8.18032786885246e-05, "loss": 5.4524, "step": 500 }, { "epoch": 0.04115808352549321, "grad_norm": 1.7151856422424316, "learning_rate": 8.213114754098361e-05, "loss": 5.4337, "step": 502 }, { "epoch": 0.04132205995388163, "grad_norm": 1.7919552326202393, "learning_rate": 8.245901639344262e-05, "loss": 5.4316, "step": 504 }, { "epoch": 0.041486036382270046, "grad_norm": 2.1312031745910645, "learning_rate": 8.278688524590165e-05, "loss": 5.445, "step": 506 }, { "epoch": 0.04165001281065847, "grad_norm": 1.994307518005371, "learning_rate": 8.311475409836066e-05, "loss": 5.3947, "step": 508 }, { "epoch": 0.04181398923904689, "grad_norm": 1.912011981010437, "learning_rate": 8.344262295081968e-05, "loss": 5.358, "step": 510 }, { "epoch": 0.04197796566743531, "grad_norm": 2.522435188293457, "learning_rate": 8.377049180327869e-05, "loss": 5.4445, "step": 512 }, { "epoch": 0.042141942095823726, "grad_norm": 1.8543167114257812, "learning_rate": 8.40983606557377e-05, "loss": 5.4107, "step": 514 }, { "epoch": 0.042305918524212144, "grad_norm": 2.0634872913360596, "learning_rate": 8.442622950819673e-05, "loss": 5.3435, "step": 516 }, { "epoch": 0.04246989495260056, "grad_norm": 1.563451886177063, "learning_rate": 8.475409836065574e-05, "loss": 5.3955, "step": 518 }, { "epoch": 0.04263387138098898, "grad_norm": 1.305403709411621, "learning_rate": 8.508196721311476e-05, "loss": 5.3148, "step": 520 }, { "epoch": 0.042797847809377405, "grad_norm": 1.9041563272476196, "learning_rate": 8.540983606557378e-05, "loss": 5.3892, "step": 522 }, { "epoch": 0.042961824237765824, "grad_norm": 1.7804834842681885, "learning_rate": 8.57377049180328e-05, "loss": 5.3762, "step": 524 }, { "epoch": 0.04312580066615424, "grad_norm": 1.959104299545288, "learning_rate": 8.606557377049181e-05, "loss": 5.3638, "step": 526 }, { "epoch": 0.04328977709454266, "grad_norm": 2.4005024433135986, "learning_rate": 8.639344262295082e-05, "loss": 5.3913, "step": 528 }, { "epoch": 0.04345375352293108, "grad_norm": 1.8313933610916138, "learning_rate": 8.672131147540983e-05, "loss": 5.3175, "step": 530 }, { "epoch": 0.043617729951319496, "grad_norm": 1.708949327468872, "learning_rate": 8.704918032786885e-05, "loss": 5.3506, "step": 532 }, { "epoch": 0.043781706379707915, "grad_norm": 1.9135140180587769, "learning_rate": 8.737704918032787e-05, "loss": 5.3197, "step": 534 }, { "epoch": 0.04394568280809633, "grad_norm": 2.3220021724700928, "learning_rate": 8.770491803278689e-05, "loss": 5.3148, "step": 536 }, { "epoch": 0.04410965923648476, "grad_norm": 1.6934478282928467, "learning_rate": 8.803278688524591e-05, "loss": 5.3818, "step": 538 }, { "epoch": 0.044273635664873176, "grad_norm": 1.633090615272522, "learning_rate": 8.836065573770493e-05, "loss": 5.2976, "step": 540 }, { "epoch": 0.044437612093261594, "grad_norm": 2.194674253463745, "learning_rate": 8.868852459016394e-05, "loss": 5.2909, "step": 542 }, { "epoch": 0.04460158852165001, "grad_norm": 1.8494622707366943, "learning_rate": 8.901639344262295e-05, "loss": 5.3178, "step": 544 }, { "epoch": 0.04476556495003843, "grad_norm": 1.523157000541687, "learning_rate": 8.934426229508197e-05, "loss": 5.2934, "step": 546 }, { "epoch": 0.04492954137842685, "grad_norm": 1.7002984285354614, "learning_rate": 8.967213114754098e-05, "loss": 5.3107, "step": 548 }, { "epoch": 0.04509351780681527, "grad_norm": 1.9046440124511719, "learning_rate": 9e-05, "loss": 5.3225, "step": 550 }, { "epoch": 0.04525749423520369, "grad_norm": 1.8026628494262695, "learning_rate": 9.032786885245902e-05, "loss": 5.3129, "step": 552 }, { "epoch": 0.04542147066359211, "grad_norm": 1.6067262887954712, "learning_rate": 9.065573770491805e-05, "loss": 5.2214, "step": 554 }, { "epoch": 0.04558544709198053, "grad_norm": 1.669403314590454, "learning_rate": 9.098360655737706e-05, "loss": 5.274, "step": 556 }, { "epoch": 0.04574942352036895, "grad_norm": 1.7327196598052979, "learning_rate": 9.131147540983607e-05, "loss": 5.3007, "step": 558 }, { "epoch": 0.045913399948757365, "grad_norm": 1.4521604776382446, "learning_rate": 9.163934426229508e-05, "loss": 5.2284, "step": 560 }, { "epoch": 0.04607737637714578, "grad_norm": 1.5612112283706665, "learning_rate": 9.19672131147541e-05, "loss": 5.2094, "step": 562 }, { "epoch": 0.0462413528055342, "grad_norm": 2.2565908432006836, "learning_rate": 9.229508196721311e-05, "loss": 5.25, "step": 564 }, { "epoch": 0.046405329233922626, "grad_norm": 2.040969133377075, "learning_rate": 9.262295081967214e-05, "loss": 5.2399, "step": 566 }, { "epoch": 0.046569305662311045, "grad_norm": 1.8394721746444702, "learning_rate": 9.295081967213115e-05, "loss": 5.2433, "step": 568 }, { "epoch": 0.04673328209069946, "grad_norm": 2.2264137268066406, "learning_rate": 9.327868852459016e-05, "loss": 5.1826, "step": 570 }, { "epoch": 0.04689725851908788, "grad_norm": 1.537869930267334, "learning_rate": 9.360655737704919e-05, "loss": 5.2544, "step": 572 }, { "epoch": 0.0470612349474763, "grad_norm": 1.6794589757919312, "learning_rate": 9.39344262295082e-05, "loss": 5.2355, "step": 574 }, { "epoch": 0.04722521137586472, "grad_norm": 2.1024844646453857, "learning_rate": 9.426229508196722e-05, "loss": 5.2308, "step": 576 }, { "epoch": 0.047389187804253136, "grad_norm": 1.6713175773620605, "learning_rate": 9.459016393442623e-05, "loss": 5.1545, "step": 578 }, { "epoch": 0.04755316423264156, "grad_norm": 1.6628456115722656, "learning_rate": 9.491803278688524e-05, "loss": 5.1741, "step": 580 }, { "epoch": 0.04771714066102998, "grad_norm": 1.4492676258087158, "learning_rate": 9.524590163934427e-05, "loss": 5.1566, "step": 582 }, { "epoch": 0.0478811170894184, "grad_norm": 1.794235110282898, "learning_rate": 9.557377049180328e-05, "loss": 5.1699, "step": 584 }, { "epoch": 0.048045093517806815, "grad_norm": 1.934901475906372, "learning_rate": 9.59016393442623e-05, "loss": 5.1533, "step": 586 }, { "epoch": 0.048209069946195234, "grad_norm": 1.2630641460418701, "learning_rate": 9.622950819672132e-05, "loss": 5.1782, "step": 588 }, { "epoch": 0.04837304637458365, "grad_norm": 1.4576668739318848, "learning_rate": 9.655737704918033e-05, "loss": 5.1815, "step": 590 }, { "epoch": 0.04853702280297207, "grad_norm": 1.842677354812622, "learning_rate": 9.688524590163936e-05, "loss": 5.1813, "step": 592 }, { "epoch": 0.048700999231360495, "grad_norm": 1.393120288848877, "learning_rate": 9.721311475409836e-05, "loss": 5.2008, "step": 594 }, { "epoch": 0.04886497565974891, "grad_norm": 1.789939522743225, "learning_rate": 9.754098360655737e-05, "loss": 5.113, "step": 596 }, { "epoch": 0.04902895208813733, "grad_norm": 1.8867571353912354, "learning_rate": 9.78688524590164e-05, "loss": 5.1684, "step": 598 }, { "epoch": 0.04919292851652575, "grad_norm": 1.278130292892456, "learning_rate": 9.819672131147541e-05, "loss": 5.0933, "step": 600 }, { "epoch": 0.04935690494491417, "grad_norm": 1.636001467704773, "learning_rate": 9.852459016393443e-05, "loss": 5.1324, "step": 602 }, { "epoch": 0.049520881373302586, "grad_norm": 1.7511135339736938, "learning_rate": 9.885245901639345e-05, "loss": 5.1855, "step": 604 }, { "epoch": 0.049684857801691004, "grad_norm": 1.5389798879623413, "learning_rate": 9.918032786885247e-05, "loss": 5.0774, "step": 606 }, { "epoch": 0.04984883423007943, "grad_norm": 1.466962218284607, "learning_rate": 9.950819672131148e-05, "loss": 5.0469, "step": 608 }, { "epoch": 0.05001281065846785, "grad_norm": 1.6687493324279785, "learning_rate": 9.98360655737705e-05, "loss": 5.1153, "step": 610 }, { "epoch": 0.050176787086856266, "grad_norm": 2.197819232940674, "learning_rate": 9.999999816220216e-05, "loss": 5.1082, "step": 612 }, { "epoch": 0.050340763515244684, "grad_norm": 1.3717740774154663, "learning_rate": 9.999998345982023e-05, "loss": 5.0569, "step": 614 }, { "epoch": 0.0505047399436331, "grad_norm": 1.4402227401733398, "learning_rate": 9.999995405506069e-05, "loss": 5.047, "step": 616 }, { "epoch": 0.05066871637202152, "grad_norm": 1.6121858358383179, "learning_rate": 9.999990994793218e-05, "loss": 5.1063, "step": 618 }, { "epoch": 0.05083269280040994, "grad_norm": 1.4336620569229126, "learning_rate": 9.999985113844767e-05, "loss": 5.095, "step": 620 }, { "epoch": 0.050996669228798364, "grad_norm": 1.3124680519104004, "learning_rate": 9.999977762662447e-05, "loss": 5.0633, "step": 622 }, { "epoch": 0.05116064565718678, "grad_norm": 1.4217371940612793, "learning_rate": 9.999968941248419e-05, "loss": 5.0547, "step": 624 }, { "epoch": 0.0513246220855752, "grad_norm": 1.182154655456543, "learning_rate": 9.999958649605275e-05, "loss": 5.1194, "step": 626 }, { "epoch": 0.05148859851396362, "grad_norm": 1.8198816776275635, "learning_rate": 9.999946887736043e-05, "loss": 5.0367, "step": 628 }, { "epoch": 0.051652574942352036, "grad_norm": 1.8673418760299683, "learning_rate": 9.99993365564418e-05, "loss": 5.0187, "step": 630 }, { "epoch": 0.051816551370740455, "grad_norm": 1.7428374290466309, "learning_rate": 9.99991895333358e-05, "loss": 5.0675, "step": 632 }, { "epoch": 0.05198052779912887, "grad_norm": 1.8670332431793213, "learning_rate": 9.999902780808563e-05, "loss": 5.0166, "step": 634 }, { "epoch": 0.05214450422751729, "grad_norm": 1.2812050580978394, "learning_rate": 9.999885138073886e-05, "loss": 5.059, "step": 636 }, { "epoch": 0.052308480655905716, "grad_norm": 1.7798051834106445, "learning_rate": 9.999866025134737e-05, "loss": 5.0736, "step": 638 }, { "epoch": 0.052472457084294134, "grad_norm": 1.9266560077667236, "learning_rate": 9.999845441996734e-05, "loss": 4.9463, "step": 640 }, { "epoch": 0.05263643351268255, "grad_norm": 1.7603977918624878, "learning_rate": 9.999823388665932e-05, "loss": 4.9446, "step": 642 }, { "epoch": 0.05280040994107097, "grad_norm": 1.383430004119873, "learning_rate": 9.999799865148816e-05, "loss": 5.0142, "step": 644 }, { "epoch": 0.05296438636945939, "grad_norm": 1.325101613998413, "learning_rate": 9.9997748714523e-05, "loss": 4.9583, "step": 646 }, { "epoch": 0.05312836279784781, "grad_norm": 1.3705639839172363, "learning_rate": 9.999748407583736e-05, "loss": 4.9762, "step": 648 }, { "epoch": 0.053292339226236225, "grad_norm": 1.5312895774841309, "learning_rate": 9.999720473550905e-05, "loss": 4.9743, "step": 650 }, { "epoch": 0.05345631565462465, "grad_norm": 1.5449235439300537, "learning_rate": 9.999691069362019e-05, "loss": 5.0046, "step": 652 }, { "epoch": 0.05362029208301307, "grad_norm": 1.4353389739990234, "learning_rate": 9.999660195025727e-05, "loss": 4.9794, "step": 654 }, { "epoch": 0.05378426851140149, "grad_norm": 1.7781524658203125, "learning_rate": 9.999627850551108e-05, "loss": 5.0089, "step": 656 }, { "epoch": 0.053948244939789905, "grad_norm": 1.5605298280715942, "learning_rate": 9.999594035947668e-05, "loss": 4.9213, "step": 658 }, { "epoch": 0.05411222136817832, "grad_norm": 1.1541820764541626, "learning_rate": 9.999558751225355e-05, "loss": 4.9311, "step": 660 }, { "epoch": 0.05427619779656674, "grad_norm": 1.2745718955993652, "learning_rate": 9.999521996394544e-05, "loss": 4.9893, "step": 662 }, { "epoch": 0.05444017422495516, "grad_norm": 1.3617721796035767, "learning_rate": 9.999483771466041e-05, "loss": 4.8475, "step": 664 }, { "epoch": 0.054604150653343585, "grad_norm": 1.6262531280517578, "learning_rate": 9.999444076451086e-05, "loss": 4.944, "step": 666 }, { "epoch": 0.054768127081732, "grad_norm": 1.830307960510254, "learning_rate": 9.99940291136135e-05, "loss": 4.9142, "step": 668 }, { "epoch": 0.05493210351012042, "grad_norm": 1.6637526750564575, "learning_rate": 9.999360276208942e-05, "loss": 4.9335, "step": 670 }, { "epoch": 0.05509607993850884, "grad_norm": 1.5176993608474731, "learning_rate": 9.999316171006395e-05, "loss": 4.8994, "step": 672 }, { "epoch": 0.05526005636689726, "grad_norm": 1.4975998401641846, "learning_rate": 9.999270595766677e-05, "loss": 4.9188, "step": 674 }, { "epoch": 0.055424032795285676, "grad_norm": 1.371583342552185, "learning_rate": 9.999223550503191e-05, "loss": 4.9602, "step": 676 }, { "epoch": 0.055588009223674094, "grad_norm": 1.2749911546707153, "learning_rate": 9.999175035229774e-05, "loss": 4.8472, "step": 678 }, { "epoch": 0.05575198565206252, "grad_norm": 1.6738046407699585, "learning_rate": 9.999125049960687e-05, "loss": 4.8499, "step": 680 }, { "epoch": 0.05591596208045094, "grad_norm": 1.3173338174819946, "learning_rate": 9.999073594710629e-05, "loss": 4.8861, "step": 682 }, { "epoch": 0.056079938508839355, "grad_norm": 1.332027792930603, "learning_rate": 9.999020669494731e-05, "loss": 4.9713, "step": 684 }, { "epoch": 0.056243914937227774, "grad_norm": 1.3671154975891113, "learning_rate": 9.998966274328557e-05, "loss": 4.888, "step": 686 }, { "epoch": 0.05640789136561619, "grad_norm": 1.5266999006271362, "learning_rate": 9.998910409228097e-05, "loss": 4.891, "step": 688 }, { "epoch": 0.05657186779400461, "grad_norm": 1.5487200021743774, "learning_rate": 9.998853074209785e-05, "loss": 4.8246, "step": 690 }, { "epoch": 0.05673584422239303, "grad_norm": 1.2039135694503784, "learning_rate": 9.998794269290474e-05, "loss": 4.837, "step": 692 }, { "epoch": 0.05689982065078145, "grad_norm": 1.6249526739120483, "learning_rate": 9.998733994487458e-05, "loss": 4.8498, "step": 694 }, { "epoch": 0.05706379707916987, "grad_norm": 1.849284291267395, "learning_rate": 9.998672249818461e-05, "loss": 4.8735, "step": 696 }, { "epoch": 0.05722777350755829, "grad_norm": 1.22111976146698, "learning_rate": 9.998609035301638e-05, "loss": 4.8487, "step": 698 }, { "epoch": 0.05739174993594671, "grad_norm": 1.4855297803878784, "learning_rate": 9.998544350955578e-05, "loss": 4.8222, "step": 700 }, { "epoch": 0.057555726364335126, "grad_norm": 1.0973803997039795, "learning_rate": 9.998478196799301e-05, "loss": 4.8494, "step": 702 }, { "epoch": 0.057719702792723544, "grad_norm": 1.3077099323272705, "learning_rate": 9.998410572852259e-05, "loss": 4.9111, "step": 704 }, { "epoch": 0.05788367922111196, "grad_norm": 1.3853782415390015, "learning_rate": 9.998341479134337e-05, "loss": 4.8096, "step": 706 }, { "epoch": 0.05804765564950039, "grad_norm": 1.0545806884765625, "learning_rate": 9.998270915665852e-05, "loss": 4.8357, "step": 708 }, { "epoch": 0.058211632077888806, "grad_norm": 1.1127121448516846, "learning_rate": 9.998198882467552e-05, "loss": 4.7969, "step": 710 }, { "epoch": 0.058375608506277224, "grad_norm": 1.3986823558807373, "learning_rate": 9.998125379560618e-05, "loss": 4.851, "step": 712 }, { "epoch": 0.05853958493466564, "grad_norm": 1.2785799503326416, "learning_rate": 9.998050406966668e-05, "loss": 4.7953, "step": 714 }, { "epoch": 0.05870356136305406, "grad_norm": 1.2151364088058472, "learning_rate": 9.99797396470774e-05, "loss": 4.7614, "step": 716 }, { "epoch": 0.05886753779144248, "grad_norm": 1.221731424331665, "learning_rate": 9.997896052806319e-05, "loss": 4.7832, "step": 718 }, { "epoch": 0.0590315142198309, "grad_norm": 1.2028709650039673, "learning_rate": 9.99781667128531e-05, "loss": 4.7496, "step": 720 }, { "epoch": 0.05919549064821932, "grad_norm": 1.2175623178482056, "learning_rate": 9.997735820168055e-05, "loss": 4.764, "step": 722 }, { "epoch": 0.05935946707660774, "grad_norm": 1.2626240253448486, "learning_rate": 9.99765349947833e-05, "loss": 4.7384, "step": 724 }, { "epoch": 0.05952344350499616, "grad_norm": 1.1986509561538696, "learning_rate": 9.997569709240339e-05, "loss": 4.771, "step": 726 }, { "epoch": 0.059687419933384576, "grad_norm": 1.3522735834121704, "learning_rate": 9.997484449478724e-05, "loss": 4.8, "step": 728 }, { "epoch": 0.059851396361772995, "grad_norm": 1.4882395267486572, "learning_rate": 9.997397720218553e-05, "loss": 4.7719, "step": 730 }, { "epoch": 0.06001537279016141, "grad_norm": 1.54912531375885, "learning_rate": 9.99730952148533e-05, "loss": 4.796, "step": 732 }, { "epoch": 0.06017934921854983, "grad_norm": 1.442063570022583, "learning_rate": 9.997219853304986e-05, "loss": 4.7315, "step": 734 }, { "epoch": 0.06034332564693825, "grad_norm": 1.193917155265808, "learning_rate": 9.997128715703892e-05, "loss": 4.7235, "step": 736 }, { "epoch": 0.060507302075326674, "grad_norm": 1.3164023160934448, "learning_rate": 9.997036108708843e-05, "loss": 4.7216, "step": 738 }, { "epoch": 0.06067127850371509, "grad_norm": 1.4200711250305176, "learning_rate": 9.996942032347074e-05, "loss": 4.7607, "step": 740 }, { "epoch": 0.06083525493210351, "grad_norm": 1.6272212266921997, "learning_rate": 9.996846486646245e-05, "loss": 4.7334, "step": 742 }, { "epoch": 0.06099923136049193, "grad_norm": 1.371127963066101, "learning_rate": 9.996749471634452e-05, "loss": 4.7856, "step": 744 }, { "epoch": 0.06116320778888035, "grad_norm": 1.3041075468063354, "learning_rate": 9.996650987340222e-05, "loss": 4.7614, "step": 746 }, { "epoch": 0.061327184217268765, "grad_norm": 1.3496429920196533, "learning_rate": 9.996551033792514e-05, "loss": 4.7778, "step": 748 }, { "epoch": 0.061491160645657184, "grad_norm": 1.433722734451294, "learning_rate": 9.996449611020719e-05, "loss": 4.7239, "step": 750 }, { "epoch": 0.06165513707404561, "grad_norm": 1.6855007410049438, "learning_rate": 9.996346719054659e-05, "loss": 4.8127, "step": 752 }, { "epoch": 0.06181911350243403, "grad_norm": 1.6095404624938965, "learning_rate": 9.996242357924591e-05, "loss": 4.718, "step": 754 }, { "epoch": 0.061983089930822445, "grad_norm": 1.3969067335128784, "learning_rate": 9.996136527661202e-05, "loss": 4.6984, "step": 756 }, { "epoch": 0.06214706635921086, "grad_norm": 1.154539704322815, "learning_rate": 9.99602922829561e-05, "loss": 4.7393, "step": 758 }, { "epoch": 0.06231104278759928, "grad_norm": 1.1856812238693237, "learning_rate": 9.995920459859367e-05, "loss": 4.6979, "step": 760 }, { "epoch": 0.0624750192159877, "grad_norm": 1.17214834690094, "learning_rate": 9.995810222384454e-05, "loss": 4.6376, "step": 762 }, { "epoch": 0.06263899564437612, "grad_norm": 1.2906792163848877, "learning_rate": 9.995698515903289e-05, "loss": 4.7684, "step": 764 }, { "epoch": 0.06280297207276454, "grad_norm": 1.3379504680633545, "learning_rate": 9.995585340448719e-05, "loss": 4.6261, "step": 766 }, { "epoch": 0.06296694850115296, "grad_norm": 1.4633430242538452, "learning_rate": 9.995470696054021e-05, "loss": 4.6889, "step": 768 }, { "epoch": 0.06313092492954138, "grad_norm": 1.4456816911697388, "learning_rate": 9.995354582752907e-05, "loss": 4.644, "step": 770 }, { "epoch": 0.0632949013579298, "grad_norm": 1.2467187643051147, "learning_rate": 9.995237000579519e-05, "loss": 4.613, "step": 772 }, { "epoch": 0.06345887778631822, "grad_norm": 1.2840498685836792, "learning_rate": 9.995117949568433e-05, "loss": 4.6492, "step": 774 }, { "epoch": 0.06362285421470663, "grad_norm": 0.9967436194419861, "learning_rate": 9.994997429754656e-05, "loss": 4.6299, "step": 776 }, { "epoch": 0.06378683064309505, "grad_norm": 1.2300001382827759, "learning_rate": 9.994875441173623e-05, "loss": 4.6396, "step": 778 }, { "epoch": 0.06395080707148347, "grad_norm": 1.225391149520874, "learning_rate": 9.99475198386121e-05, "loss": 4.6316, "step": 780 }, { "epoch": 0.06411478349987189, "grad_norm": 1.148902177810669, "learning_rate": 9.994627057853714e-05, "loss": 4.6019, "step": 782 }, { "epoch": 0.0642787599282603, "grad_norm": 1.3424532413482666, "learning_rate": 9.994500663187874e-05, "loss": 4.6052, "step": 784 }, { "epoch": 0.06444273635664872, "grad_norm": 1.1050846576690674, "learning_rate": 9.99437279990085e-05, "loss": 4.5606, "step": 786 }, { "epoch": 0.06460671278503716, "grad_norm": 1.2558073997497559, "learning_rate": 9.994243468030247e-05, "loss": 4.5245, "step": 788 }, { "epoch": 0.06477068921342558, "grad_norm": 1.366250991821289, "learning_rate": 9.99411266761409e-05, "loss": 4.6662, "step": 790 }, { "epoch": 0.064934665641814, "grad_norm": 1.0933619737625122, "learning_rate": 9.993980398690843e-05, "loss": 4.5972, "step": 792 }, { "epoch": 0.06509864207020241, "grad_norm": 1.0256333351135254, "learning_rate": 9.993846661299396e-05, "loss": 4.5935, "step": 794 }, { "epoch": 0.06526261849859083, "grad_norm": 0.9090489149093628, "learning_rate": 9.993711455479077e-05, "loss": 4.5371, "step": 796 }, { "epoch": 0.06542659492697925, "grad_norm": 1.3676148653030396, "learning_rate": 9.993574781269644e-05, "loss": 4.5959, "step": 798 }, { "epoch": 0.06559057135536767, "grad_norm": 1.1888647079467773, "learning_rate": 9.993436638711284e-05, "loss": 4.6145, "step": 800 }, { "epoch": 0.06575454778375608, "grad_norm": 0.883764386177063, "learning_rate": 9.993297027844616e-05, "loss": 4.5562, "step": 802 }, { "epoch": 0.0659185242121445, "grad_norm": 0.969134509563446, "learning_rate": 9.993155948710694e-05, "loss": 4.6248, "step": 804 }, { "epoch": 0.06608250064053292, "grad_norm": 0.9472710490226746, "learning_rate": 9.993013401351002e-05, "loss": 4.5769, "step": 806 }, { "epoch": 0.06624647706892134, "grad_norm": 1.162370204925537, "learning_rate": 9.992869385807455e-05, "loss": 4.5947, "step": 808 }, { "epoch": 0.06641045349730976, "grad_norm": 1.0858770608901978, "learning_rate": 9.992723902122403e-05, "loss": 4.5491, "step": 810 }, { "epoch": 0.06657442992569818, "grad_norm": 1.0923309326171875, "learning_rate": 9.992576950338621e-05, "loss": 4.5982, "step": 812 }, { "epoch": 0.0667384063540866, "grad_norm": 1.1544495820999146, "learning_rate": 9.992428530499323e-05, "loss": 4.5675, "step": 814 }, { "epoch": 0.06690238278247503, "grad_norm": 1.1099858283996582, "learning_rate": 9.99227864264815e-05, "loss": 4.5893, "step": 816 }, { "epoch": 0.06706635921086344, "grad_norm": 1.3164221048355103, "learning_rate": 9.992127286829176e-05, "loss": 4.5993, "step": 818 }, { "epoch": 0.06723033563925186, "grad_norm": 1.2815008163452148, "learning_rate": 9.991974463086908e-05, "loss": 4.5687, "step": 820 }, { "epoch": 0.06739431206764028, "grad_norm": 1.104801058769226, "learning_rate": 9.991820171466284e-05, "loss": 4.5231, "step": 822 }, { "epoch": 0.0675582884960287, "grad_norm": 1.2623943090438843, "learning_rate": 9.99166441201267e-05, "loss": 4.5498, "step": 824 }, { "epoch": 0.06772226492441712, "grad_norm": 1.3679825067520142, "learning_rate": 9.991507184771869e-05, "loss": 4.5317, "step": 826 }, { "epoch": 0.06788624135280553, "grad_norm": 1.1458314657211304, "learning_rate": 9.991348489790113e-05, "loss": 4.4599, "step": 828 }, { "epoch": 0.06805021778119395, "grad_norm": 1.1556310653686523, "learning_rate": 9.991188327114068e-05, "loss": 4.5466, "step": 830 }, { "epoch": 0.06821419420958237, "grad_norm": 1.2080873250961304, "learning_rate": 9.991026696790825e-05, "loss": 4.5734, "step": 832 }, { "epoch": 0.06837817063797079, "grad_norm": 1.3832921981811523, "learning_rate": 9.990863598867914e-05, "loss": 4.5367, "step": 834 }, { "epoch": 0.06854214706635921, "grad_norm": 0.9393659234046936, "learning_rate": 9.990699033393293e-05, "loss": 4.5072, "step": 836 }, { "epoch": 0.06870612349474763, "grad_norm": 0.9582691788673401, "learning_rate": 9.990533000415352e-05, "loss": 4.5046, "step": 838 }, { "epoch": 0.06887009992313604, "grad_norm": 1.2170627117156982, "learning_rate": 9.990365499982912e-05, "loss": 4.4628, "step": 840 }, { "epoch": 0.06903407635152446, "grad_norm": 1.037985920906067, "learning_rate": 9.990196532145227e-05, "loss": 4.5521, "step": 842 }, { "epoch": 0.0691980527799129, "grad_norm": 0.9628452658653259, "learning_rate": 9.990026096951981e-05, "loss": 4.5028, "step": 844 }, { "epoch": 0.06936202920830131, "grad_norm": 1.0910757780075073, "learning_rate": 9.98985419445329e-05, "loss": 4.5354, "step": 846 }, { "epoch": 0.06952600563668973, "grad_norm": 1.5108650922775269, "learning_rate": 9.989680824699703e-05, "loss": 4.5267, "step": 848 }, { "epoch": 0.06968998206507815, "grad_norm": 1.214145541191101, "learning_rate": 9.989505987742198e-05, "loss": 4.5271, "step": 850 }, { "epoch": 0.06985395849346657, "grad_norm": 1.2133456468582153, "learning_rate": 9.989329683632185e-05, "loss": 4.5195, "step": 852 }, { "epoch": 0.07001793492185499, "grad_norm": 1.4688955545425415, "learning_rate": 9.989151912421503e-05, "loss": 4.449, "step": 854 }, { "epoch": 0.0701819113502434, "grad_norm": 0.9931148290634155, "learning_rate": 9.988972674162432e-05, "loss": 4.4952, "step": 856 }, { "epoch": 0.07034588777863182, "grad_norm": 1.1149705648422241, "learning_rate": 9.988791968907671e-05, "loss": 4.4773, "step": 858 }, { "epoch": 0.07050986420702024, "grad_norm": 1.125609278678894, "learning_rate": 9.98860979671036e-05, "loss": 4.4371, "step": 860 }, { "epoch": 0.07067384063540866, "grad_norm": 0.985559344291687, "learning_rate": 9.988426157624063e-05, "loss": 4.4348, "step": 862 }, { "epoch": 0.07083781706379708, "grad_norm": 0.937849223613739, "learning_rate": 9.988241051702778e-05, "loss": 4.4481, "step": 864 }, { "epoch": 0.0710017934921855, "grad_norm": 1.4474624395370483, "learning_rate": 9.98805447900094e-05, "loss": 4.5007, "step": 866 }, { "epoch": 0.07116576992057391, "grad_norm": 0.9758108854293823, "learning_rate": 9.987866439573403e-05, "loss": 4.448, "step": 868 }, { "epoch": 0.07132974634896233, "grad_norm": 1.147123098373413, "learning_rate": 9.987676933475467e-05, "loss": 4.3825, "step": 870 }, { "epoch": 0.07149372277735076, "grad_norm": 1.2334039211273193, "learning_rate": 9.98748596076285e-05, "loss": 4.4577, "step": 872 }, { "epoch": 0.07165769920573918, "grad_norm": 1.1643340587615967, "learning_rate": 9.987293521491711e-05, "loss": 4.4511, "step": 874 }, { "epoch": 0.0718216756341276, "grad_norm": 0.9636064767837524, "learning_rate": 9.987099615718634e-05, "loss": 4.4068, "step": 876 }, { "epoch": 0.07198565206251602, "grad_norm": 1.0015913248062134, "learning_rate": 9.986904243500637e-05, "loss": 4.4082, "step": 878 }, { "epoch": 0.07214962849090444, "grad_norm": 1.003480076789856, "learning_rate": 9.98670740489517e-05, "loss": 4.368, "step": 880 }, { "epoch": 0.07231360491929285, "grad_norm": 1.0843322277069092, "learning_rate": 9.98650909996011e-05, "loss": 4.4389, "step": 882 }, { "epoch": 0.07247758134768127, "grad_norm": 1.0302671194076538, "learning_rate": 9.986309328753772e-05, "loss": 4.5192, "step": 884 }, { "epoch": 0.07264155777606969, "grad_norm": 1.0695348978042603, "learning_rate": 9.986108091334896e-05, "loss": 4.4382, "step": 886 }, { "epoch": 0.07280553420445811, "grad_norm": 0.8926975131034851, "learning_rate": 9.985905387762656e-05, "loss": 4.3326, "step": 888 }, { "epoch": 0.07296951063284653, "grad_norm": 0.8886080384254456, "learning_rate": 9.985701218096655e-05, "loss": 4.3415, "step": 890 }, { "epoch": 0.07313348706123494, "grad_norm": 1.0607457160949707, "learning_rate": 9.985495582396931e-05, "loss": 4.3892, "step": 892 }, { "epoch": 0.07329746348962336, "grad_norm": 0.9411015510559082, "learning_rate": 9.985288480723949e-05, "loss": 4.4194, "step": 894 }, { "epoch": 0.07346143991801178, "grad_norm": 1.1800034046173096, "learning_rate": 9.985079913138607e-05, "loss": 4.2992, "step": 896 }, { "epoch": 0.0736254163464002, "grad_norm": 0.9723591804504395, "learning_rate": 9.984869879702235e-05, "loss": 4.3078, "step": 898 }, { "epoch": 0.07378939277478862, "grad_norm": 1.0988435745239258, "learning_rate": 9.98465838047659e-05, "loss": 4.3855, "step": 900 }, { "epoch": 0.07395336920317705, "grad_norm": 1.3330668210983276, "learning_rate": 9.984445415523866e-05, "loss": 4.326, "step": 902 }, { "epoch": 0.07411734563156547, "grad_norm": 1.271883249282837, "learning_rate": 9.984230984906684e-05, "loss": 4.414, "step": 904 }, { "epoch": 0.07428132205995389, "grad_norm": 1.2705031633377075, "learning_rate": 9.984015088688094e-05, "loss": 4.3481, "step": 906 }, { "epoch": 0.0744452984883423, "grad_norm": 1.1692169904708862, "learning_rate": 9.983797726931585e-05, "loss": 4.3656, "step": 908 }, { "epoch": 0.07460927491673072, "grad_norm": 1.557630181312561, "learning_rate": 9.983578899701068e-05, "loss": 4.3926, "step": 910 }, { "epoch": 0.07477325134511914, "grad_norm": 1.252167820930481, "learning_rate": 9.98335860706089e-05, "loss": 4.3261, "step": 912 }, { "epoch": 0.07493722777350756, "grad_norm": 1.0178437232971191, "learning_rate": 9.983136849075827e-05, "loss": 4.3705, "step": 914 }, { "epoch": 0.07510120420189598, "grad_norm": 1.0884320735931396, "learning_rate": 9.982913625811086e-05, "loss": 4.3286, "step": 916 }, { "epoch": 0.0752651806302844, "grad_norm": 1.018403172492981, "learning_rate": 9.982688937332305e-05, "loss": 4.3491, "step": 918 }, { "epoch": 0.07542915705867281, "grad_norm": 1.3070770502090454, "learning_rate": 9.982462783705555e-05, "loss": 4.3412, "step": 920 }, { "epoch": 0.07559313348706123, "grad_norm": 0.8433274030685425, "learning_rate": 9.982235164997336e-05, "loss": 4.3234, "step": 922 }, { "epoch": 0.07575710991544965, "grad_norm": 1.3230133056640625, "learning_rate": 9.982006081274575e-05, "loss": 4.275, "step": 924 }, { "epoch": 0.07592108634383807, "grad_norm": 1.1820060014724731, "learning_rate": 9.981775532604637e-05, "loss": 4.2512, "step": 926 }, { "epoch": 0.07608506277222649, "grad_norm": 1.17715322971344, "learning_rate": 9.981543519055314e-05, "loss": 4.2675, "step": 928 }, { "epoch": 0.07624903920061492, "grad_norm": 0.9106295704841614, "learning_rate": 9.981310040694829e-05, "loss": 4.2771, "step": 930 }, { "epoch": 0.07641301562900334, "grad_norm": 1.2079112529754639, "learning_rate": 9.981075097591834e-05, "loss": 4.3381, "step": 932 }, { "epoch": 0.07657699205739175, "grad_norm": 1.199350118637085, "learning_rate": 9.980838689815414e-05, "loss": 4.3208, "step": 934 }, { "epoch": 0.07674096848578017, "grad_norm": 1.088350534439087, "learning_rate": 9.980600817435086e-05, "loss": 4.2439, "step": 936 }, { "epoch": 0.07690494491416859, "grad_norm": 1.0347201824188232, "learning_rate": 9.980361480520794e-05, "loss": 4.3169, "step": 938 }, { "epoch": 0.07706892134255701, "grad_norm": 1.3007529973983765, "learning_rate": 9.980120679142917e-05, "loss": 4.2441, "step": 940 }, { "epoch": 0.07723289777094543, "grad_norm": 0.9512838125228882, "learning_rate": 9.979878413372259e-05, "loss": 4.2474, "step": 942 }, { "epoch": 0.07739687419933385, "grad_norm": 1.170279622077942, "learning_rate": 9.979634683280059e-05, "loss": 4.2459, "step": 944 }, { "epoch": 0.07756085062772226, "grad_norm": 0.9993996620178223, "learning_rate": 9.979389488937984e-05, "loss": 4.2862, "step": 946 }, { "epoch": 0.07772482705611068, "grad_norm": 0.8914362788200378, "learning_rate": 9.979142830418134e-05, "loss": 4.2872, "step": 948 }, { "epoch": 0.0778888034844991, "grad_norm": 1.153712511062622, "learning_rate": 9.978894707793039e-05, "loss": 4.2023, "step": 950 }, { "epoch": 0.07805277991288752, "grad_norm": 1.2968518733978271, "learning_rate": 9.978645121135659e-05, "loss": 4.1831, "step": 952 }, { "epoch": 0.07821675634127594, "grad_norm": 1.2519747018814087, "learning_rate": 9.978394070519383e-05, "loss": 4.2492, "step": 954 }, { "epoch": 0.07838073276966435, "grad_norm": 1.1906182765960693, "learning_rate": 9.978141556018031e-05, "loss": 4.2596, "step": 956 }, { "epoch": 0.07854470919805279, "grad_norm": 1.073822021484375, "learning_rate": 9.977887577705857e-05, "loss": 4.2341, "step": 958 }, { "epoch": 0.0787086856264412, "grad_norm": 1.1360152959823608, "learning_rate": 9.977632135657543e-05, "loss": 4.2389, "step": 960 }, { "epoch": 0.07887266205482962, "grad_norm": 1.0772216320037842, "learning_rate": 9.977375229948195e-05, "loss": 4.1266, "step": 962 }, { "epoch": 0.07903663848321804, "grad_norm": 1.0052435398101807, "learning_rate": 9.977116860653363e-05, "loss": 4.2679, "step": 964 }, { "epoch": 0.07920061491160646, "grad_norm": 0.8987570405006409, "learning_rate": 9.976857027849019e-05, "loss": 4.2019, "step": 966 }, { "epoch": 0.07936459133999488, "grad_norm": 1.3094909191131592, "learning_rate": 9.97659573161156e-05, "loss": 4.152, "step": 968 }, { "epoch": 0.0795285677683833, "grad_norm": 1.0730571746826172, "learning_rate": 9.976332972017826e-05, "loss": 4.1829, "step": 970 }, { "epoch": 0.07969254419677171, "grad_norm": 1.0100387334823608, "learning_rate": 9.976068749145078e-05, "loss": 4.1619, "step": 972 }, { "epoch": 0.07985652062516013, "grad_norm": 0.9166683554649353, "learning_rate": 9.97580306307101e-05, "loss": 4.2055, "step": 974 }, { "epoch": 0.08002049705354855, "grad_norm": 0.9950259327888489, "learning_rate": 9.975535913873748e-05, "loss": 4.2696, "step": 976 }, { "epoch": 0.08018447348193697, "grad_norm": 1.174676775932312, "learning_rate": 9.975267301631846e-05, "loss": 4.257, "step": 978 }, { "epoch": 0.08034844991032539, "grad_norm": 0.9548665881156921, "learning_rate": 9.974997226424288e-05, "loss": 4.1695, "step": 980 }, { "epoch": 0.0805124263387138, "grad_norm": 1.1128441095352173, "learning_rate": 9.974725688330489e-05, "loss": 4.158, "step": 982 }, { "epoch": 0.08067640276710222, "grad_norm": 1.0748997926712036, "learning_rate": 9.974452687430293e-05, "loss": 4.1652, "step": 984 }, { "epoch": 0.08084037919549064, "grad_norm": 1.1463944911956787, "learning_rate": 9.974178223803981e-05, "loss": 4.1962, "step": 986 }, { "epoch": 0.08100435562387907, "grad_norm": 1.18082857131958, "learning_rate": 9.97390229753225e-05, "loss": 4.2016, "step": 988 }, { "epoch": 0.08116833205226749, "grad_norm": 1.0245002508163452, "learning_rate": 9.973624908696242e-05, "loss": 4.1244, "step": 990 }, { "epoch": 0.08133230848065591, "grad_norm": 1.0269415378570557, "learning_rate": 9.973346057377519e-05, "loss": 4.1954, "step": 992 }, { "epoch": 0.08149628490904433, "grad_norm": 1.3380223512649536, "learning_rate": 9.973065743658078e-05, "loss": 4.1392, "step": 994 }, { "epoch": 0.08166026133743275, "grad_norm": 1.0681672096252441, "learning_rate": 9.972783967620345e-05, "loss": 4.0451, "step": 996 }, { "epoch": 0.08182423776582116, "grad_norm": 1.1791712045669556, "learning_rate": 9.972500729347176e-05, "loss": 4.1513, "step": 998 }, { "epoch": 0.08198821419420958, "grad_norm": 0.9805436134338379, "learning_rate": 9.972216028921854e-05, "loss": 4.0942, "step": 1000 }, { "epoch": 0.082152190622598, "grad_norm": 1.2421460151672363, "learning_rate": 9.971929866428095e-05, "loss": 4.1216, "step": 1002 }, { "epoch": 0.08231616705098642, "grad_norm": 1.085977554321289, "learning_rate": 9.971642241950048e-05, "loss": 4.0897, "step": 1004 }, { "epoch": 0.08248014347937484, "grad_norm": 0.987576425075531, "learning_rate": 9.971353155572284e-05, "loss": 4.1011, "step": 1006 }, { "epoch": 0.08264411990776326, "grad_norm": 1.1075130701065063, "learning_rate": 9.97106260737981e-05, "loss": 4.1546, "step": 1008 }, { "epoch": 0.08280809633615167, "grad_norm": 1.0441356897354126, "learning_rate": 9.97077059745806e-05, "loss": 4.1124, "step": 1010 }, { "epoch": 0.08297207276454009, "grad_norm": 1.0685722827911377, "learning_rate": 9.970477125892902e-05, "loss": 4.1241, "step": 1012 }, { "epoch": 0.08313604919292851, "grad_norm": 0.9862858057022095, "learning_rate": 9.970182192770627e-05, "loss": 4.1194, "step": 1014 }, { "epoch": 0.08330002562131694, "grad_norm": 1.0223233699798584, "learning_rate": 9.96988579817796e-05, "loss": 4.1471, "step": 1016 }, { "epoch": 0.08346400204970536, "grad_norm": 1.0092227458953857, "learning_rate": 9.969587942202057e-05, "loss": 4.141, "step": 1018 }, { "epoch": 0.08362797847809378, "grad_norm": 0.9383386969566345, "learning_rate": 9.9692886249305e-05, "loss": 4.0195, "step": 1020 }, { "epoch": 0.0837919549064822, "grad_norm": 0.7819382548332214, "learning_rate": 9.968987846451305e-05, "loss": 4.0688, "step": 1022 }, { "epoch": 0.08395593133487061, "grad_norm": 0.9499008655548096, "learning_rate": 9.968685606852913e-05, "loss": 4.1559, "step": 1024 }, { "epoch": 0.08411990776325903, "grad_norm": 0.9793714880943298, "learning_rate": 9.968381906224195e-05, "loss": 4.1147, "step": 1026 }, { "epoch": 0.08428388419164745, "grad_norm": 0.8692449927330017, "learning_rate": 9.968076744654458e-05, "loss": 4.0808, "step": 1028 }, { "epoch": 0.08444786062003587, "grad_norm": 1.0884157419204712, "learning_rate": 9.967770122233431e-05, "loss": 4.0656, "step": 1030 }, { "epoch": 0.08461183704842429, "grad_norm": 0.8661439418792725, "learning_rate": 9.967462039051275e-05, "loss": 4.0854, "step": 1032 }, { "epoch": 0.0847758134768127, "grad_norm": 0.8530150651931763, "learning_rate": 9.967152495198584e-05, "loss": 4.0791, "step": 1034 }, { "epoch": 0.08493978990520112, "grad_norm": 1.156949520111084, "learning_rate": 9.966841490766378e-05, "loss": 4.0719, "step": 1036 }, { "epoch": 0.08510376633358954, "grad_norm": 0.9264504313468933, "learning_rate": 9.966529025846105e-05, "loss": 4.0668, "step": 1038 }, { "epoch": 0.08526774276197796, "grad_norm": 1.0428452491760254, "learning_rate": 9.966215100529645e-05, "loss": 4.0053, "step": 1040 }, { "epoch": 0.08543171919036638, "grad_norm": 0.9271348118782043, "learning_rate": 9.96589971490931e-05, "loss": 4.105, "step": 1042 }, { "epoch": 0.08559569561875481, "grad_norm": 1.1432150602340698, "learning_rate": 9.965582869077836e-05, "loss": 4.0669, "step": 1044 }, { "epoch": 0.08575967204714323, "grad_norm": 0.9777700901031494, "learning_rate": 9.965264563128391e-05, "loss": 4.01, "step": 1046 }, { "epoch": 0.08592364847553165, "grad_norm": 0.8779552578926086, "learning_rate": 9.96494479715457e-05, "loss": 3.9549, "step": 1048 }, { "epoch": 0.08608762490392007, "grad_norm": 0.915309488773346, "learning_rate": 9.964623571250404e-05, "loss": 4.0066, "step": 1050 }, { "epoch": 0.08625160133230848, "grad_norm": 0.753326416015625, "learning_rate": 9.964300885510345e-05, "loss": 4.0328, "step": 1052 }, { "epoch": 0.0864155777606969, "grad_norm": 0.8076086044311523, "learning_rate": 9.96397674002928e-05, "loss": 4.0298, "step": 1054 }, { "epoch": 0.08657955418908532, "grad_norm": 1.0535773038864136, "learning_rate": 9.963651134902524e-05, "loss": 4.0164, "step": 1056 }, { "epoch": 0.08674353061747374, "grad_norm": 0.8676068186759949, "learning_rate": 9.963324070225817e-05, "loss": 4.0412, "step": 1058 }, { "epoch": 0.08690750704586216, "grad_norm": 0.9461095333099365, "learning_rate": 9.962995546095333e-05, "loss": 4.0265, "step": 1060 }, { "epoch": 0.08707148347425057, "grad_norm": 0.9060032367706299, "learning_rate": 9.962665562607676e-05, "loss": 4.0104, "step": 1062 }, { "epoch": 0.08723545990263899, "grad_norm": 0.8914903998374939, "learning_rate": 9.962334119859873e-05, "loss": 3.9873, "step": 1064 }, { "epoch": 0.08739943633102741, "grad_norm": 1.057827353477478, "learning_rate": 9.962001217949389e-05, "loss": 4.0135, "step": 1066 }, { "epoch": 0.08756341275941583, "grad_norm": 0.9309613108634949, "learning_rate": 9.961666856974108e-05, "loss": 4.0184, "step": 1068 }, { "epoch": 0.08772738918780425, "grad_norm": 1.0464098453521729, "learning_rate": 9.961331037032351e-05, "loss": 4.0043, "step": 1070 }, { "epoch": 0.08789136561619267, "grad_norm": 0.7158762812614441, "learning_rate": 9.960993758222863e-05, "loss": 4.0443, "step": 1072 }, { "epoch": 0.0880553420445811, "grad_norm": 0.7665286660194397, "learning_rate": 9.960655020644823e-05, "loss": 3.9872, "step": 1074 }, { "epoch": 0.08821931847296952, "grad_norm": 0.8457959890365601, "learning_rate": 9.960314824397833e-05, "loss": 3.9417, "step": 1076 }, { "epoch": 0.08838329490135793, "grad_norm": 1.152944564819336, "learning_rate": 9.959973169581928e-05, "loss": 4.0407, "step": 1078 }, { "epoch": 0.08854727132974635, "grad_norm": 0.9561640620231628, "learning_rate": 9.959630056297573e-05, "loss": 4.0229, "step": 1080 }, { "epoch": 0.08871124775813477, "grad_norm": 0.9881964921951294, "learning_rate": 9.959285484645658e-05, "loss": 3.9769, "step": 1082 }, { "epoch": 0.08887522418652319, "grad_norm": 0.891594409942627, "learning_rate": 9.9589394547275e-05, "loss": 3.9446, "step": 1084 }, { "epoch": 0.0890392006149116, "grad_norm": 0.7694927453994751, "learning_rate": 9.958591966644853e-05, "loss": 3.968, "step": 1086 }, { "epoch": 0.08920317704330002, "grad_norm": 0.9506424069404602, "learning_rate": 9.958243020499893e-05, "loss": 3.976, "step": 1088 }, { "epoch": 0.08936715347168844, "grad_norm": 0.9964757561683655, "learning_rate": 9.95789261639523e-05, "loss": 4.0114, "step": 1090 }, { "epoch": 0.08953112990007686, "grad_norm": 1.0715919733047485, "learning_rate": 9.957540754433894e-05, "loss": 3.9759, "step": 1092 }, { "epoch": 0.08969510632846528, "grad_norm": 0.9044798612594604, "learning_rate": 9.957187434719352e-05, "loss": 3.9858, "step": 1094 }, { "epoch": 0.0898590827568537, "grad_norm": 0.8711757659912109, "learning_rate": 9.956832657355497e-05, "loss": 3.9678, "step": 1096 }, { "epoch": 0.09002305918524212, "grad_norm": 1.049402117729187, "learning_rate": 9.956476422446652e-05, "loss": 3.8973, "step": 1098 }, { "epoch": 0.09018703561363053, "grad_norm": 0.9575179219245911, "learning_rate": 9.956118730097564e-05, "loss": 3.964, "step": 1100 }, { "epoch": 0.09035101204201897, "grad_norm": 1.0238109827041626, "learning_rate": 9.955759580413412e-05, "loss": 3.9312, "step": 1102 }, { "epoch": 0.09051498847040738, "grad_norm": 0.9079989790916443, "learning_rate": 9.955398973499805e-05, "loss": 3.9918, "step": 1104 }, { "epoch": 0.0906789648987958, "grad_norm": 0.9520390033721924, "learning_rate": 9.955036909462777e-05, "loss": 3.9605, "step": 1106 }, { "epoch": 0.09084294132718422, "grad_norm": 0.9960986971855164, "learning_rate": 9.954673388408793e-05, "loss": 3.9898, "step": 1108 }, { "epoch": 0.09100691775557264, "grad_norm": 0.9239450097084045, "learning_rate": 9.954308410444747e-05, "loss": 3.9124, "step": 1110 }, { "epoch": 0.09117089418396106, "grad_norm": 0.8150608539581299, "learning_rate": 9.953941975677954e-05, "loss": 4.0019, "step": 1112 }, { "epoch": 0.09133487061234948, "grad_norm": 0.8617908358573914, "learning_rate": 9.953574084216171e-05, "loss": 3.9295, "step": 1114 }, { "epoch": 0.0914988470407379, "grad_norm": 0.9470566511154175, "learning_rate": 9.953204736167569e-05, "loss": 3.9361, "step": 1116 }, { "epoch": 0.09166282346912631, "grad_norm": 0.8053050637245178, "learning_rate": 9.95283393164076e-05, "loss": 3.952, "step": 1118 }, { "epoch": 0.09182679989751473, "grad_norm": 0.8299336433410645, "learning_rate": 9.952461670744774e-05, "loss": 3.9024, "step": 1120 }, { "epoch": 0.09199077632590315, "grad_norm": 0.8287034630775452, "learning_rate": 9.952087953589073e-05, "loss": 3.8938, "step": 1122 }, { "epoch": 0.09215475275429157, "grad_norm": 0.8874202370643616, "learning_rate": 9.951712780283552e-05, "loss": 3.9419, "step": 1124 }, { "epoch": 0.09231872918267998, "grad_norm": 0.855707585811615, "learning_rate": 9.951336150938526e-05, "loss": 3.876, "step": 1126 }, { "epoch": 0.0924827056110684, "grad_norm": 0.7967925667762756, "learning_rate": 9.950958065664741e-05, "loss": 3.9378, "step": 1128 }, { "epoch": 0.09264668203945683, "grad_norm": 0.7915927171707153, "learning_rate": 9.950578524573377e-05, "loss": 3.8823, "step": 1130 }, { "epoch": 0.09281065846784525, "grad_norm": 0.8065016865730286, "learning_rate": 9.950197527776033e-05, "loss": 3.9223, "step": 1132 }, { "epoch": 0.09297463489623367, "grad_norm": 0.7818952202796936, "learning_rate": 9.949815075384742e-05, "loss": 3.9015, "step": 1134 }, { "epoch": 0.09313861132462209, "grad_norm": 0.9576020240783691, "learning_rate": 9.949431167511963e-05, "loss": 3.9206, "step": 1136 }, { "epoch": 0.09330258775301051, "grad_norm": 0.8579282760620117, "learning_rate": 9.949045804270581e-05, "loss": 3.9195, "step": 1138 }, { "epoch": 0.09346656418139893, "grad_norm": 0.7089054584503174, "learning_rate": 9.948658985773915e-05, "loss": 3.8824, "step": 1140 }, { "epoch": 0.09363054060978734, "grad_norm": 0.7162330150604248, "learning_rate": 9.948270712135705e-05, "loss": 3.8758, "step": 1142 }, { "epoch": 0.09379451703817576, "grad_norm": 1.0738468170166016, "learning_rate": 9.947880983470124e-05, "loss": 3.8408, "step": 1144 }, { "epoch": 0.09395849346656418, "grad_norm": 0.8277477025985718, "learning_rate": 9.947489799891769e-05, "loss": 3.87, "step": 1146 }, { "epoch": 0.0941224698949526, "grad_norm": 0.7950448989868164, "learning_rate": 9.947097161515668e-05, "loss": 3.9011, "step": 1148 }, { "epoch": 0.09428644632334102, "grad_norm": 0.9803164601325989, "learning_rate": 9.946703068457275e-05, "loss": 3.8423, "step": 1150 }, { "epoch": 0.09445042275172943, "grad_norm": 0.9193939566612244, "learning_rate": 9.946307520832472e-05, "loss": 3.892, "step": 1152 }, { "epoch": 0.09461439918011785, "grad_norm": 0.8781881928443909, "learning_rate": 9.94591051875757e-05, "loss": 3.8793, "step": 1154 }, { "epoch": 0.09477837560850627, "grad_norm": 0.7899143695831299, "learning_rate": 9.945512062349304e-05, "loss": 3.8543, "step": 1156 }, { "epoch": 0.0949423520368947, "grad_norm": 0.9870477914810181, "learning_rate": 9.94511215172484e-05, "loss": 3.8322, "step": 1158 }, { "epoch": 0.09510632846528312, "grad_norm": 1.0156104564666748, "learning_rate": 9.944710787001773e-05, "loss": 3.8877, "step": 1160 }, { "epoch": 0.09527030489367154, "grad_norm": 0.9456477165222168, "learning_rate": 9.94430796829812e-05, "loss": 3.873, "step": 1162 }, { "epoch": 0.09543428132205996, "grad_norm": 0.808631956577301, "learning_rate": 9.943903695732333e-05, "loss": 3.8156, "step": 1164 }, { "epoch": 0.09559825775044838, "grad_norm": 0.9766041040420532, "learning_rate": 9.943497969423283e-05, "loss": 3.8912, "step": 1166 }, { "epoch": 0.0957622341788368, "grad_norm": 1.068718671798706, "learning_rate": 9.943090789490276e-05, "loss": 3.8365, "step": 1168 }, { "epoch": 0.09592621060722521, "grad_norm": 0.8382964134216309, "learning_rate": 9.94268215605304e-05, "loss": 3.8484, "step": 1170 }, { "epoch": 0.09609018703561363, "grad_norm": 0.9153487086296082, "learning_rate": 9.942272069231735e-05, "loss": 3.8154, "step": 1172 }, { "epoch": 0.09625416346400205, "grad_norm": 0.8782140016555786, "learning_rate": 9.941860529146944e-05, "loss": 3.8068, "step": 1174 }, { "epoch": 0.09641813989239047, "grad_norm": 1.0472065210342407, "learning_rate": 9.941447535919681e-05, "loss": 3.887, "step": 1176 }, { "epoch": 0.09658211632077889, "grad_norm": 0.8168578743934631, "learning_rate": 9.941033089671385e-05, "loss": 3.8221, "step": 1178 }, { "epoch": 0.0967460927491673, "grad_norm": 0.8000882863998413, "learning_rate": 9.940617190523923e-05, "loss": 3.8425, "step": 1180 }, { "epoch": 0.09691006917755572, "grad_norm": 0.9577187895774841, "learning_rate": 9.940199838599588e-05, "loss": 3.8679, "step": 1182 }, { "epoch": 0.09707404560594414, "grad_norm": 0.8853087425231934, "learning_rate": 9.939781034021105e-05, "loss": 3.8584, "step": 1184 }, { "epoch": 0.09723802203433256, "grad_norm": 0.811404824256897, "learning_rate": 9.939360776911619e-05, "loss": 3.8383, "step": 1186 }, { "epoch": 0.09740199846272099, "grad_norm": 0.811406672000885, "learning_rate": 9.938939067394706e-05, "loss": 3.7581, "step": 1188 }, { "epoch": 0.09756597489110941, "grad_norm": 0.8467538356781006, "learning_rate": 9.93851590559437e-05, "loss": 3.8692, "step": 1190 }, { "epoch": 0.09772995131949783, "grad_norm": 0.8470588326454163, "learning_rate": 9.938091291635039e-05, "loss": 3.8054, "step": 1192 }, { "epoch": 0.09789392774788624, "grad_norm": 0.8332253694534302, "learning_rate": 9.93766522564157e-05, "loss": 3.8541, "step": 1194 }, { "epoch": 0.09805790417627466, "grad_norm": 0.7565471529960632, "learning_rate": 9.93723770773925e-05, "loss": 3.7615, "step": 1196 }, { "epoch": 0.09822188060466308, "grad_norm": 0.9599220156669617, "learning_rate": 9.936808738053785e-05, "loss": 3.8253, "step": 1198 }, { "epoch": 0.0983858570330515, "grad_norm": 0.7333558201789856, "learning_rate": 9.936378316711317e-05, "loss": 3.8042, "step": 1200 }, { "epoch": 0.09854983346143992, "grad_norm": 0.7844712138175964, "learning_rate": 9.935946443838407e-05, "loss": 3.8675, "step": 1202 }, { "epoch": 0.09871380988982834, "grad_norm": 0.9951752424240112, "learning_rate": 9.935513119562045e-05, "loss": 3.8046, "step": 1204 }, { "epoch": 0.09887778631821675, "grad_norm": 0.8403246998786926, "learning_rate": 9.935078344009654e-05, "loss": 3.8651, "step": 1206 }, { "epoch": 0.09904176274660517, "grad_norm": 0.9809087514877319, "learning_rate": 9.934642117309074e-05, "loss": 3.7967, "step": 1208 }, { "epoch": 0.09920573917499359, "grad_norm": 1.024038553237915, "learning_rate": 9.93420443958858e-05, "loss": 3.798, "step": 1210 }, { "epoch": 0.09936971560338201, "grad_norm": 0.8824047446250916, "learning_rate": 9.933765310976867e-05, "loss": 3.7725, "step": 1212 }, { "epoch": 0.09953369203177043, "grad_norm": 0.7645026445388794, "learning_rate": 9.933324731603063e-05, "loss": 3.7802, "step": 1214 }, { "epoch": 0.09969766846015886, "grad_norm": 0.7119176387786865, "learning_rate": 9.932882701596716e-05, "loss": 3.7905, "step": 1216 }, { "epoch": 0.09986164488854728, "grad_norm": 0.6710290908813477, "learning_rate": 9.932439221087806e-05, "loss": 3.7898, "step": 1218 }, { "epoch": 0.1000256213169357, "grad_norm": 0.8256493210792542, "learning_rate": 9.931994290206738e-05, "loss": 3.7857, "step": 1220 }, { "epoch": 0.10018959774532411, "grad_norm": 0.8101679086685181, "learning_rate": 9.931547909084339e-05, "loss": 3.806, "step": 1222 }, { "epoch": 0.10035357417371253, "grad_norm": 0.7876362204551697, "learning_rate": 9.931100077851871e-05, "loss": 3.7395, "step": 1224 }, { "epoch": 0.10051755060210095, "grad_norm": 0.7746016979217529, "learning_rate": 9.930650796641017e-05, "loss": 3.7961, "step": 1226 }, { "epoch": 0.10068152703048937, "grad_norm": 0.6673750877380371, "learning_rate": 9.930200065583883e-05, "loss": 3.7708, "step": 1228 }, { "epoch": 0.10084550345887779, "grad_norm": 0.796775221824646, "learning_rate": 9.92974788481301e-05, "loss": 3.8279, "step": 1230 }, { "epoch": 0.1010094798872662, "grad_norm": 0.725659191608429, "learning_rate": 9.929294254461359e-05, "loss": 3.7343, "step": 1232 }, { "epoch": 0.10117345631565462, "grad_norm": 0.7385995984077454, "learning_rate": 9.928839174662317e-05, "loss": 3.8351, "step": 1234 }, { "epoch": 0.10133743274404304, "grad_norm": 0.7573429346084595, "learning_rate": 9.928382645549703e-05, "loss": 3.8307, "step": 1236 }, { "epoch": 0.10150140917243146, "grad_norm": 0.7082958221435547, "learning_rate": 9.927924667257756e-05, "loss": 3.812, "step": 1238 }, { "epoch": 0.10166538560081988, "grad_norm": 0.819148063659668, "learning_rate": 9.927465239921143e-05, "loss": 3.8176, "step": 1240 }, { "epoch": 0.1018293620292083, "grad_norm": 0.8235107660293579, "learning_rate": 9.927004363674959e-05, "loss": 3.766, "step": 1242 }, { "epoch": 0.10199333845759673, "grad_norm": 0.8283859491348267, "learning_rate": 9.926542038654722e-05, "loss": 3.7771, "step": 1244 }, { "epoch": 0.10215731488598515, "grad_norm": 0.739612340927124, "learning_rate": 9.92607826499638e-05, "loss": 3.7786, "step": 1246 }, { "epoch": 0.10232129131437356, "grad_norm": 0.6946161985397339, "learning_rate": 9.925613042836302e-05, "loss": 3.7143, "step": 1248 }, { "epoch": 0.10248526774276198, "grad_norm": 0.8461303114891052, "learning_rate": 9.925146372311288e-05, "loss": 3.7804, "step": 1250 }, { "epoch": 0.1026492441711504, "grad_norm": 0.8450109958648682, "learning_rate": 9.924678253558557e-05, "loss": 3.7209, "step": 1252 }, { "epoch": 0.10281322059953882, "grad_norm": 0.7622053623199463, "learning_rate": 9.924208686715763e-05, "loss": 3.7491, "step": 1254 }, { "epoch": 0.10297719702792724, "grad_norm": 0.8452515602111816, "learning_rate": 9.923737671920978e-05, "loss": 3.7461, "step": 1256 }, { "epoch": 0.10314117345631565, "grad_norm": 0.7765418887138367, "learning_rate": 9.923265209312704e-05, "loss": 3.7491, "step": 1258 }, { "epoch": 0.10330514988470407, "grad_norm": 0.8474555015563965, "learning_rate": 9.922791299029868e-05, "loss": 3.7277, "step": 1260 }, { "epoch": 0.10346912631309249, "grad_norm": 0.9031925201416016, "learning_rate": 9.922315941211823e-05, "loss": 3.7966, "step": 1262 }, { "epoch": 0.10363310274148091, "grad_norm": 0.7896429300308228, "learning_rate": 9.921839135998343e-05, "loss": 3.7119, "step": 1264 }, { "epoch": 0.10379707916986933, "grad_norm": 0.8127464056015015, "learning_rate": 9.921360883529636e-05, "loss": 3.7126, "step": 1266 }, { "epoch": 0.10396105559825775, "grad_norm": 0.9778748750686646, "learning_rate": 9.920881183946328e-05, "loss": 3.786, "step": 1268 }, { "epoch": 0.10412503202664616, "grad_norm": 0.9540830254554749, "learning_rate": 9.920400037389474e-05, "loss": 3.7243, "step": 1270 }, { "epoch": 0.10428900845503458, "grad_norm": 1.0872488021850586, "learning_rate": 9.919917444000555e-05, "loss": 3.7761, "step": 1272 }, { "epoch": 0.10445298488342301, "grad_norm": 0.8405986428260803, "learning_rate": 9.919433403921476e-05, "loss": 3.7128, "step": 1274 }, { "epoch": 0.10461696131181143, "grad_norm": 0.9105572700500488, "learning_rate": 9.918947917294568e-05, "loss": 3.7153, "step": 1276 }, { "epoch": 0.10478093774019985, "grad_norm": 0.7782844305038452, "learning_rate": 9.918460984262588e-05, "loss": 3.6612, "step": 1278 }, { "epoch": 0.10494491416858827, "grad_norm": 0.746457576751709, "learning_rate": 9.917972604968715e-05, "loss": 3.7196, "step": 1280 }, { "epoch": 0.10510889059697669, "grad_norm": 0.856855034828186, "learning_rate": 9.917482779556557e-05, "loss": 3.6806, "step": 1282 }, { "epoch": 0.1052728670253651, "grad_norm": 0.6793504953384399, "learning_rate": 9.916991508170148e-05, "loss": 3.706, "step": 1284 }, { "epoch": 0.10543684345375352, "grad_norm": 0.9537250995635986, "learning_rate": 9.916498790953943e-05, "loss": 3.7844, "step": 1286 }, { "epoch": 0.10560081988214194, "grad_norm": 0.6688050627708435, "learning_rate": 9.916004628052824e-05, "loss": 3.7161, "step": 1288 }, { "epoch": 0.10576479631053036, "grad_norm": 0.7840797305107117, "learning_rate": 9.9155090196121e-05, "loss": 3.7309, "step": 1290 }, { "epoch": 0.10592877273891878, "grad_norm": 0.7196126580238342, "learning_rate": 9.9150119657775e-05, "loss": 3.7571, "step": 1292 }, { "epoch": 0.1060927491673072, "grad_norm": 0.8072746396064758, "learning_rate": 9.914513466695188e-05, "loss": 3.713, "step": 1294 }, { "epoch": 0.10625672559569561, "grad_norm": 0.7342846393585205, "learning_rate": 9.914013522511743e-05, "loss": 3.7195, "step": 1296 }, { "epoch": 0.10642070202408403, "grad_norm": 0.7047367691993713, "learning_rate": 9.91351213337417e-05, "loss": 3.7306, "step": 1298 }, { "epoch": 0.10658467845247245, "grad_norm": 0.6987332105636597, "learning_rate": 9.913009299429904e-05, "loss": 3.7034, "step": 1300 }, { "epoch": 0.10674865488086088, "grad_norm": 0.6787108778953552, "learning_rate": 9.912505020826801e-05, "loss": 3.7059, "step": 1302 }, { "epoch": 0.1069126313092493, "grad_norm": 0.6666189432144165, "learning_rate": 9.911999297713145e-05, "loss": 3.7006, "step": 1304 }, { "epoch": 0.10707660773763772, "grad_norm": 0.6904592514038086, "learning_rate": 9.91149213023764e-05, "loss": 3.762, "step": 1306 }, { "epoch": 0.10724058416602614, "grad_norm": 0.8941283822059631, "learning_rate": 9.91098351854942e-05, "loss": 3.6793, "step": 1308 }, { "epoch": 0.10740456059441456, "grad_norm": 0.7099062204360962, "learning_rate": 9.910473462798039e-05, "loss": 3.6232, "step": 1310 }, { "epoch": 0.10756853702280297, "grad_norm": 0.8660025596618652, "learning_rate": 9.909961963133479e-05, "loss": 3.7272, "step": 1312 }, { "epoch": 0.10773251345119139, "grad_norm": 0.7533067464828491, "learning_rate": 9.909449019706145e-05, "loss": 3.7422, "step": 1314 }, { "epoch": 0.10789648987957981, "grad_norm": 0.7809666991233826, "learning_rate": 9.908934632666864e-05, "loss": 3.6608, "step": 1316 }, { "epoch": 0.10806046630796823, "grad_norm": 0.7331179976463318, "learning_rate": 9.908418802166894e-05, "loss": 3.6718, "step": 1318 }, { "epoch": 0.10822444273635665, "grad_norm": 0.7965632081031799, "learning_rate": 9.907901528357915e-05, "loss": 3.7616, "step": 1320 }, { "epoch": 0.10838841916474506, "grad_norm": 0.7728394269943237, "learning_rate": 9.907382811392026e-05, "loss": 3.6811, "step": 1322 }, { "epoch": 0.10855239559313348, "grad_norm": 0.7595298290252686, "learning_rate": 9.906862651421756e-05, "loss": 3.7385, "step": 1324 }, { "epoch": 0.1087163720215219, "grad_norm": 0.8519642353057861, "learning_rate": 9.906341048600056e-05, "loss": 3.7245, "step": 1326 }, { "epoch": 0.10888034844991032, "grad_norm": 0.7890890836715698, "learning_rate": 9.905818003080305e-05, "loss": 3.7362, "step": 1328 }, { "epoch": 0.10904432487829875, "grad_norm": 0.784578800201416, "learning_rate": 9.9052935150163e-05, "loss": 3.6611, "step": 1330 }, { "epoch": 0.10920830130668717, "grad_norm": 0.8048536777496338, "learning_rate": 9.904767584562267e-05, "loss": 3.7034, "step": 1332 }, { "epoch": 0.10937227773507559, "grad_norm": 0.7695967555046082, "learning_rate": 9.904240211872855e-05, "loss": 3.6495, "step": 1334 }, { "epoch": 0.109536254163464, "grad_norm": 0.6730368733406067, "learning_rate": 9.903711397103136e-05, "loss": 3.6522, "step": 1336 }, { "epoch": 0.10970023059185242, "grad_norm": 0.7607198357582092, "learning_rate": 9.903181140408609e-05, "loss": 3.6837, "step": 1338 }, { "epoch": 0.10986420702024084, "grad_norm": 0.7482820749282837, "learning_rate": 9.902649441945188e-05, "loss": 3.6851, "step": 1340 }, { "epoch": 0.11002818344862926, "grad_norm": 0.7840356230735779, "learning_rate": 9.902116301869227e-05, "loss": 3.6291, "step": 1342 }, { "epoch": 0.11019215987701768, "grad_norm": 0.6231241822242737, "learning_rate": 9.901581720337488e-05, "loss": 3.6361, "step": 1344 }, { "epoch": 0.1103561363054061, "grad_norm": 0.7832990884780884, "learning_rate": 9.901045697507165e-05, "loss": 3.6948, "step": 1346 }, { "epoch": 0.11052011273379451, "grad_norm": 0.7512111067771912, "learning_rate": 9.900508233535875e-05, "loss": 3.6697, "step": 1348 }, { "epoch": 0.11068408916218293, "grad_norm": 0.7433375716209412, "learning_rate": 9.899969328581659e-05, "loss": 3.7029, "step": 1350 }, { "epoch": 0.11084806559057135, "grad_norm": 0.7757459878921509, "learning_rate": 9.899428982802979e-05, "loss": 3.6965, "step": 1352 }, { "epoch": 0.11101204201895977, "grad_norm": 0.7528740763664246, "learning_rate": 9.898887196358721e-05, "loss": 3.6376, "step": 1354 }, { "epoch": 0.11117601844734819, "grad_norm": 0.7715753316879272, "learning_rate": 9.898343969408199e-05, "loss": 3.6403, "step": 1356 }, { "epoch": 0.11133999487573662, "grad_norm": 0.8425229787826538, "learning_rate": 9.897799302111146e-05, "loss": 3.6655, "step": 1358 }, { "epoch": 0.11150397130412504, "grad_norm": 0.8818288445472717, "learning_rate": 9.897253194627722e-05, "loss": 3.6635, "step": 1360 }, { "epoch": 0.11166794773251346, "grad_norm": 0.9619779586791992, "learning_rate": 9.896705647118504e-05, "loss": 3.6766, "step": 1362 }, { "epoch": 0.11183192416090187, "grad_norm": 0.9253937005996704, "learning_rate": 9.896156659744504e-05, "loss": 3.6859, "step": 1364 }, { "epoch": 0.11199590058929029, "grad_norm": 0.9797042608261108, "learning_rate": 9.895606232667144e-05, "loss": 3.649, "step": 1366 }, { "epoch": 0.11215987701767871, "grad_norm": 0.7821505665779114, "learning_rate": 9.895054366048281e-05, "loss": 3.7164, "step": 1368 }, { "epoch": 0.11232385344606713, "grad_norm": 0.8402411937713623, "learning_rate": 9.894501060050186e-05, "loss": 3.7369, "step": 1370 }, { "epoch": 0.11248782987445555, "grad_norm": 0.9361245036125183, "learning_rate": 9.893946314835559e-05, "loss": 3.6806, "step": 1372 }, { "epoch": 0.11265180630284397, "grad_norm": 0.9412410855293274, "learning_rate": 9.893390130567523e-05, "loss": 3.676, "step": 1374 }, { "epoch": 0.11281578273123238, "grad_norm": 0.8595388531684875, "learning_rate": 9.89283250740962e-05, "loss": 3.6322, "step": 1376 }, { "epoch": 0.1129797591596208, "grad_norm": 0.7236664295196533, "learning_rate": 9.892273445525817e-05, "loss": 3.6686, "step": 1378 }, { "epoch": 0.11314373558800922, "grad_norm": 0.659474790096283, "learning_rate": 9.891712945080508e-05, "loss": 3.6125, "step": 1380 }, { "epoch": 0.11330771201639764, "grad_norm": 0.7330523133277893, "learning_rate": 9.891151006238507e-05, "loss": 3.6654, "step": 1382 }, { "epoch": 0.11347168844478606, "grad_norm": 0.8555669784545898, "learning_rate": 9.890587629165049e-05, "loss": 3.5857, "step": 1384 }, { "epoch": 0.11363566487317447, "grad_norm": 0.9295856952667236, "learning_rate": 9.890022814025792e-05, "loss": 3.6874, "step": 1386 }, { "epoch": 0.1137996413015629, "grad_norm": 0.63069087266922, "learning_rate": 9.889456560986823e-05, "loss": 3.6358, "step": 1388 }, { "epoch": 0.11396361772995132, "grad_norm": 0.7602105140686035, "learning_rate": 9.888888870214643e-05, "loss": 3.6113, "step": 1390 }, { "epoch": 0.11412759415833974, "grad_norm": 0.7809365391731262, "learning_rate": 9.888319741876185e-05, "loss": 3.6585, "step": 1392 }, { "epoch": 0.11429157058672816, "grad_norm": 0.6976439952850342, "learning_rate": 9.887749176138794e-05, "loss": 3.695, "step": 1394 }, { "epoch": 0.11445554701511658, "grad_norm": 0.775341272354126, "learning_rate": 9.887177173170248e-05, "loss": 3.6783, "step": 1396 }, { "epoch": 0.114619523443505, "grad_norm": 0.7208604216575623, "learning_rate": 9.886603733138742e-05, "loss": 3.692, "step": 1398 }, { "epoch": 0.11478349987189342, "grad_norm": 0.7146006226539612, "learning_rate": 9.886028856212893e-05, "loss": 3.6103, "step": 1400 }, { "epoch": 0.11494747630028183, "grad_norm": 0.6759282946586609, "learning_rate": 9.885452542561744e-05, "loss": 3.7273, "step": 1402 }, { "epoch": 0.11511145272867025, "grad_norm": 0.680182158946991, "learning_rate": 9.884874792354758e-05, "loss": 3.6314, "step": 1404 }, { "epoch": 0.11527542915705867, "grad_norm": 0.8232313394546509, "learning_rate": 9.884295605761822e-05, "loss": 3.6098, "step": 1406 }, { "epoch": 0.11543940558544709, "grad_norm": 0.6593087911605835, "learning_rate": 9.883714982953244e-05, "loss": 3.5716, "step": 1408 }, { "epoch": 0.1156033820138355, "grad_norm": 0.6459859013557434, "learning_rate": 9.883132924099753e-05, "loss": 3.6418, "step": 1410 }, { "epoch": 0.11576735844222392, "grad_norm": 0.7395800352096558, "learning_rate": 9.882549429372505e-05, "loss": 3.6148, "step": 1412 }, { "epoch": 0.11593133487061234, "grad_norm": 0.6539946794509888, "learning_rate": 9.881964498943074e-05, "loss": 3.6126, "step": 1414 }, { "epoch": 0.11609531129900078, "grad_norm": 0.7250804901123047, "learning_rate": 9.881378132983456e-05, "loss": 3.5968, "step": 1416 }, { "epoch": 0.1162592877273892, "grad_norm": 0.6665759086608887, "learning_rate": 9.880790331666073e-05, "loss": 3.5718, "step": 1418 }, { "epoch": 0.11642326415577761, "grad_norm": 0.6589260697364807, "learning_rate": 9.880201095163765e-05, "loss": 3.6868, "step": 1420 }, { "epoch": 0.11658724058416603, "grad_norm": 0.7453758716583252, "learning_rate": 9.879610423649795e-05, "loss": 3.5785, "step": 1422 }, { "epoch": 0.11675121701255445, "grad_norm": 0.760637640953064, "learning_rate": 9.879018317297852e-05, "loss": 3.6157, "step": 1424 }, { "epoch": 0.11691519344094287, "grad_norm": 0.7891120910644531, "learning_rate": 9.878424776282039e-05, "loss": 3.6347, "step": 1426 }, { "epoch": 0.11707916986933128, "grad_norm": 0.8159520626068115, "learning_rate": 9.877829800776887e-05, "loss": 3.5827, "step": 1428 }, { "epoch": 0.1172431462977197, "grad_norm": 0.7945658564567566, "learning_rate": 9.877233390957348e-05, "loss": 3.6225, "step": 1430 }, { "epoch": 0.11740712272610812, "grad_norm": 0.6557226777076721, "learning_rate": 9.876635546998795e-05, "loss": 3.5789, "step": 1432 }, { "epoch": 0.11757109915449654, "grad_norm": 0.7214797735214233, "learning_rate": 9.876036269077021e-05, "loss": 3.6572, "step": 1434 }, { "epoch": 0.11773507558288496, "grad_norm": 0.6005678772926331, "learning_rate": 9.875435557368245e-05, "loss": 3.5788, "step": 1436 }, { "epoch": 0.11789905201127338, "grad_norm": 0.7711865901947021, "learning_rate": 9.874833412049102e-05, "loss": 3.6336, "step": 1438 }, { "epoch": 0.1180630284396618, "grad_norm": 0.6728807091712952, "learning_rate": 9.874229833296654e-05, "loss": 3.5508, "step": 1440 }, { "epoch": 0.11822700486805021, "grad_norm": 0.7253196239471436, "learning_rate": 9.873624821288378e-05, "loss": 3.5958, "step": 1442 }, { "epoch": 0.11839098129643864, "grad_norm": 0.7579479813575745, "learning_rate": 9.87301837620218e-05, "loss": 3.5491, "step": 1444 }, { "epoch": 0.11855495772482706, "grad_norm": 0.8953156471252441, "learning_rate": 9.872410498216382e-05, "loss": 3.5745, "step": 1446 }, { "epoch": 0.11871893415321548, "grad_norm": 0.7993676662445068, "learning_rate": 9.87180118750973e-05, "loss": 3.6284, "step": 1448 }, { "epoch": 0.1188829105816039, "grad_norm": 0.7717795968055725, "learning_rate": 9.871190444261391e-05, "loss": 3.6401, "step": 1450 }, { "epoch": 0.11904688700999232, "grad_norm": 0.7042213082313538, "learning_rate": 9.870578268650951e-05, "loss": 3.5621, "step": 1452 }, { "epoch": 0.11921086343838073, "grad_norm": 0.7422952651977539, "learning_rate": 9.86996466085842e-05, "loss": 3.6372, "step": 1454 }, { "epoch": 0.11937483986676915, "grad_norm": 0.6974375247955322, "learning_rate": 9.869349621064228e-05, "loss": 3.6112, "step": 1456 }, { "epoch": 0.11953881629515757, "grad_norm": 0.6447159051895142, "learning_rate": 9.868733149449224e-05, "loss": 3.6014, "step": 1458 }, { "epoch": 0.11970279272354599, "grad_norm": 0.7838313579559326, "learning_rate": 9.868115246194682e-05, "loss": 3.6063, "step": 1460 }, { "epoch": 0.11986676915193441, "grad_norm": 0.7431493997573853, "learning_rate": 9.867495911482295e-05, "loss": 3.6058, "step": 1462 }, { "epoch": 0.12003074558032283, "grad_norm": 0.7936450242996216, "learning_rate": 9.866875145494175e-05, "loss": 3.5997, "step": 1464 }, { "epoch": 0.12019472200871124, "grad_norm": 0.7873966097831726, "learning_rate": 9.866252948412859e-05, "loss": 3.5496, "step": 1466 }, { "epoch": 0.12035869843709966, "grad_norm": 0.6305302381515503, "learning_rate": 9.865629320421301e-05, "loss": 3.5844, "step": 1468 }, { "epoch": 0.12052267486548808, "grad_norm": 0.8464959263801575, "learning_rate": 9.865004261702879e-05, "loss": 3.5736, "step": 1470 }, { "epoch": 0.1206866512938765, "grad_norm": 0.5869840383529663, "learning_rate": 9.86437777244139e-05, "loss": 3.5445, "step": 1472 }, { "epoch": 0.12085062772226493, "grad_norm": 0.680647611618042, "learning_rate": 9.863749852821049e-05, "loss": 3.5203, "step": 1474 }, { "epoch": 0.12101460415065335, "grad_norm": 0.61550372838974, "learning_rate": 9.863120503026497e-05, "loss": 3.5638, "step": 1476 }, { "epoch": 0.12117858057904177, "grad_norm": 0.7552183270454407, "learning_rate": 9.862489723242792e-05, "loss": 3.5792, "step": 1478 }, { "epoch": 0.12134255700743019, "grad_norm": 0.6832086443901062, "learning_rate": 9.861857513655413e-05, "loss": 3.5966, "step": 1480 }, { "epoch": 0.1215065334358186, "grad_norm": 0.7540295124053955, "learning_rate": 9.86122387445026e-05, "loss": 3.6294, "step": 1482 }, { "epoch": 0.12167050986420702, "grad_norm": 0.8079466223716736, "learning_rate": 9.860588805813653e-05, "loss": 3.5425, "step": 1484 }, { "epoch": 0.12183448629259544, "grad_norm": 0.6720893979072571, "learning_rate": 9.859952307932334e-05, "loss": 3.5946, "step": 1486 }, { "epoch": 0.12199846272098386, "grad_norm": 0.7064858078956604, "learning_rate": 9.85931438099346e-05, "loss": 3.5714, "step": 1488 }, { "epoch": 0.12216243914937228, "grad_norm": 0.7637129426002502, "learning_rate": 9.858675025184616e-05, "loss": 3.5547, "step": 1490 }, { "epoch": 0.1223264155777607, "grad_norm": 0.7691531181335449, "learning_rate": 9.8580342406938e-05, "loss": 3.5592, "step": 1492 }, { "epoch": 0.12249039200614911, "grad_norm": 0.8551957607269287, "learning_rate": 9.857392027709435e-05, "loss": 3.5714, "step": 1494 }, { "epoch": 0.12265436843453753, "grad_norm": 0.792309582233429, "learning_rate": 9.856748386420362e-05, "loss": 3.6033, "step": 1496 }, { "epoch": 0.12281834486292595, "grad_norm": 0.6698387861251831, "learning_rate": 9.856103317015841e-05, "loss": 3.5304, "step": 1498 }, { "epoch": 0.12298232129131437, "grad_norm": 0.7881389260292053, "learning_rate": 9.855456819685555e-05, "loss": 3.551, "step": 1500 }, { "epoch": 0.1231462977197028, "grad_norm": 0.7923277616500854, "learning_rate": 9.854808894619602e-05, "loss": 3.5718, "step": 1502 }, { "epoch": 0.12331027414809122, "grad_norm": 0.8148519992828369, "learning_rate": 9.854159542008508e-05, "loss": 3.5866, "step": 1504 }, { "epoch": 0.12347425057647964, "grad_norm": 0.7714492082595825, "learning_rate": 9.853508762043209e-05, "loss": 3.6145, "step": 1506 }, { "epoch": 0.12363822700486805, "grad_norm": 0.8480067253112793, "learning_rate": 9.852856554915066e-05, "loss": 3.5683, "step": 1508 }, { "epoch": 0.12380220343325647, "grad_norm": 0.6881988644599915, "learning_rate": 9.85220292081586e-05, "loss": 3.5086, "step": 1510 }, { "epoch": 0.12396617986164489, "grad_norm": 0.7517293095588684, "learning_rate": 9.85154785993779e-05, "loss": 3.582, "step": 1512 }, { "epoch": 0.12413015629003331, "grad_norm": 1.0973633527755737, "learning_rate": 9.850891372473478e-05, "loss": 3.5674, "step": 1514 }, { "epoch": 0.12429413271842173, "grad_norm": 0.837307870388031, "learning_rate": 9.850233458615957e-05, "loss": 3.6073, "step": 1516 }, { "epoch": 0.12445810914681014, "grad_norm": 0.9713445901870728, "learning_rate": 9.84957411855869e-05, "loss": 3.5698, "step": 1518 }, { "epoch": 0.12462208557519856, "grad_norm": 0.9163139462471008, "learning_rate": 9.848913352495551e-05, "loss": 3.6004, "step": 1520 }, { "epoch": 0.12478606200358698, "grad_norm": 0.7779731154441833, "learning_rate": 9.848251160620839e-05, "loss": 3.6013, "step": 1522 }, { "epoch": 0.1249500384319754, "grad_norm": 0.7217362523078918, "learning_rate": 9.847587543129269e-05, "loss": 3.5181, "step": 1524 }, { "epoch": 0.12511401486036383, "grad_norm": 0.7261420488357544, "learning_rate": 9.846922500215976e-05, "loss": 3.5826, "step": 1526 }, { "epoch": 0.12527799128875225, "grad_norm": 0.6862401962280273, "learning_rate": 9.846256032076515e-05, "loss": 3.4789, "step": 1528 }, { "epoch": 0.12544196771714067, "grad_norm": 0.7996855974197388, "learning_rate": 9.845588138906859e-05, "loss": 3.5581, "step": 1530 }, { "epoch": 0.12560594414552909, "grad_norm": 0.6853513717651367, "learning_rate": 9.8449188209034e-05, "loss": 3.5594, "step": 1532 }, { "epoch": 0.1257699205739175, "grad_norm": 0.7655189037322998, "learning_rate": 9.84424807826295e-05, "loss": 3.5514, "step": 1534 }, { "epoch": 0.12593389700230592, "grad_norm": 0.6501437425613403, "learning_rate": 9.84357591118274e-05, "loss": 3.5318, "step": 1536 }, { "epoch": 0.12609787343069434, "grad_norm": 0.7742712497711182, "learning_rate": 9.842902319860417e-05, "loss": 3.505, "step": 1538 }, { "epoch": 0.12626184985908276, "grad_norm": 0.632228672504425, "learning_rate": 9.842227304494051e-05, "loss": 3.6209, "step": 1540 }, { "epoch": 0.12642582628747118, "grad_norm": 0.5981665253639221, "learning_rate": 9.841550865282128e-05, "loss": 3.5373, "step": 1542 }, { "epoch": 0.1265898027158596, "grad_norm": 0.6225053071975708, "learning_rate": 9.840873002423552e-05, "loss": 3.5161, "step": 1544 }, { "epoch": 0.126753779144248, "grad_norm": 0.6428139209747314, "learning_rate": 9.84019371611765e-05, "loss": 3.5194, "step": 1546 }, { "epoch": 0.12691775557263643, "grad_norm": 0.6379141807556152, "learning_rate": 9.83951300656416e-05, "loss": 3.5363, "step": 1548 }, { "epoch": 0.12708173200102485, "grad_norm": 0.6773483753204346, "learning_rate": 9.838830873963249e-05, "loss": 3.5162, "step": 1550 }, { "epoch": 0.12724570842941327, "grad_norm": 0.6917803883552551, "learning_rate": 9.838147318515491e-05, "loss": 3.5388, "step": 1552 }, { "epoch": 0.12740968485780169, "grad_norm": 0.6757781505584717, "learning_rate": 9.837462340421886e-05, "loss": 3.5399, "step": 1554 }, { "epoch": 0.1275736612861901, "grad_norm": 0.6116536855697632, "learning_rate": 9.836775939883852e-05, "loss": 3.5487, "step": 1556 }, { "epoch": 0.12773763771457852, "grad_norm": 0.6963580250740051, "learning_rate": 9.836088117103222e-05, "loss": 3.5252, "step": 1558 }, { "epoch": 0.12790161414296694, "grad_norm": 0.7524001002311707, "learning_rate": 9.835398872282247e-05, "loss": 3.507, "step": 1560 }, { "epoch": 0.12806559057135536, "grad_norm": 0.6589372754096985, "learning_rate": 9.834708205623599e-05, "loss": 3.5236, "step": 1562 }, { "epoch": 0.12822956699974378, "grad_norm": 0.6432667970657349, "learning_rate": 9.834016117330369e-05, "loss": 3.5065, "step": 1564 }, { "epoch": 0.1283935434281322, "grad_norm": 0.6807281970977783, "learning_rate": 9.833322607606062e-05, "loss": 3.494, "step": 1566 }, { "epoch": 0.1285575198565206, "grad_norm": 0.6932308077812195, "learning_rate": 9.832627676654601e-05, "loss": 3.5196, "step": 1568 }, { "epoch": 0.12872149628490903, "grad_norm": 0.6904752254486084, "learning_rate": 9.831931324680333e-05, "loss": 3.5883, "step": 1570 }, { "epoch": 0.12888547271329745, "grad_norm": 0.7266760468482971, "learning_rate": 9.831233551888015e-05, "loss": 3.5637, "step": 1572 }, { "epoch": 0.1290494491416859, "grad_norm": 0.6184994578361511, "learning_rate": 9.830534358482827e-05, "loss": 3.512, "step": 1574 }, { "epoch": 0.12921342557007431, "grad_norm": 0.6875196695327759, "learning_rate": 9.829833744670366e-05, "loss": 3.5822, "step": 1576 }, { "epoch": 0.12937740199846273, "grad_norm": 0.6475251317024231, "learning_rate": 9.829131710656645e-05, "loss": 3.4807, "step": 1578 }, { "epoch": 0.12954137842685115, "grad_norm": 0.6316696405410767, "learning_rate": 9.828428256648095e-05, "loss": 3.5333, "step": 1580 }, { "epoch": 0.12970535485523957, "grad_norm": 0.6235971450805664, "learning_rate": 9.827723382851565e-05, "loss": 3.4228, "step": 1582 }, { "epoch": 0.129869331283628, "grad_norm": 0.7109240293502808, "learning_rate": 9.827017089474324e-05, "loss": 3.4983, "step": 1584 }, { "epoch": 0.1300333077120164, "grad_norm": 0.7131465673446655, "learning_rate": 9.826309376724052e-05, "loss": 3.5094, "step": 1586 }, { "epoch": 0.13019728414040482, "grad_norm": 0.6761036515235901, "learning_rate": 9.825600244808853e-05, "loss": 3.5461, "step": 1588 }, { "epoch": 0.13036126056879324, "grad_norm": 0.6749283671379089, "learning_rate": 9.824889693937245e-05, "loss": 3.5214, "step": 1590 }, { "epoch": 0.13052523699718166, "grad_norm": 0.952836811542511, "learning_rate": 9.824177724318162e-05, "loss": 3.5135, "step": 1592 }, { "epoch": 0.13068921342557008, "grad_norm": 0.7100101113319397, "learning_rate": 9.823464336160959e-05, "loss": 3.5523, "step": 1594 }, { "epoch": 0.1308531898539585, "grad_norm": 0.7093891501426697, "learning_rate": 9.822749529675406e-05, "loss": 3.5338, "step": 1596 }, { "epoch": 0.13101716628234691, "grad_norm": 0.716699481010437, "learning_rate": 9.822033305071689e-05, "loss": 3.5206, "step": 1598 }, { "epoch": 0.13118114271073533, "grad_norm": 0.6527066230773926, "learning_rate": 9.821315662560415e-05, "loss": 3.5264, "step": 1600 }, { "epoch": 0.13134511913912375, "grad_norm": 0.6964712738990784, "learning_rate": 9.820596602352601e-05, "loss": 3.4957, "step": 1602 }, { "epoch": 0.13150909556751217, "grad_norm": 0.7309548854827881, "learning_rate": 9.819876124659687e-05, "loss": 3.5073, "step": 1604 }, { "epoch": 0.1316730719959006, "grad_norm": 0.7031247019767761, "learning_rate": 9.819154229693529e-05, "loss": 3.4814, "step": 1606 }, { "epoch": 0.131837048424289, "grad_norm": 0.6443150639533997, "learning_rate": 9.818430917666397e-05, "loss": 3.4822, "step": 1608 }, { "epoch": 0.13200102485267742, "grad_norm": 0.6941884160041809, "learning_rate": 9.817706188790979e-05, "loss": 3.4766, "step": 1610 }, { "epoch": 0.13216500128106584, "grad_norm": 0.6277130246162415, "learning_rate": 9.81698004328038e-05, "loss": 3.4984, "step": 1612 }, { "epoch": 0.13232897770945426, "grad_norm": 0.7393566966056824, "learning_rate": 9.816252481348122e-05, "loss": 3.4467, "step": 1614 }, { "epoch": 0.13249295413784268, "grad_norm": 0.8626520037651062, "learning_rate": 9.815523503208141e-05, "loss": 3.5066, "step": 1616 }, { "epoch": 0.1326569305662311, "grad_norm": 0.7005507946014404, "learning_rate": 9.814793109074795e-05, "loss": 3.5249, "step": 1618 }, { "epoch": 0.13282090699461951, "grad_norm": 0.695397675037384, "learning_rate": 9.814061299162853e-05, "loss": 3.5428, "step": 1620 }, { "epoch": 0.13298488342300793, "grad_norm": 0.7181118130683899, "learning_rate": 9.8133280736875e-05, "loss": 3.4943, "step": 1622 }, { "epoch": 0.13314885985139635, "grad_norm": 0.7019610404968262, "learning_rate": 9.812593432864343e-05, "loss": 3.4751, "step": 1624 }, { "epoch": 0.13331283627978477, "grad_norm": 0.629170298576355, "learning_rate": 9.811857376909398e-05, "loss": 3.4791, "step": 1626 }, { "epoch": 0.1334768127081732, "grad_norm": 0.7574000358581543, "learning_rate": 9.8111199060391e-05, "loss": 3.5068, "step": 1628 }, { "epoch": 0.13364078913656163, "grad_norm": 0.6433237195014954, "learning_rate": 9.810381020470303e-05, "loss": 3.5356, "step": 1630 }, { "epoch": 0.13380476556495005, "grad_norm": 0.8434267044067383, "learning_rate": 9.809640720420275e-05, "loss": 3.5549, "step": 1632 }, { "epoch": 0.13396874199333847, "grad_norm": 0.7503165602684021, "learning_rate": 9.808899006106697e-05, "loss": 3.4256, "step": 1634 }, { "epoch": 0.1341327184217269, "grad_norm": 0.6501772403717041, "learning_rate": 9.808155877747671e-05, "loss": 3.4676, "step": 1636 }, { "epoch": 0.1342966948501153, "grad_norm": 0.6620165109634399, "learning_rate": 9.80741133556171e-05, "loss": 3.4372, "step": 1638 }, { "epoch": 0.13446067127850372, "grad_norm": 0.7528221607208252, "learning_rate": 9.806665379767746e-05, "loss": 3.4309, "step": 1640 }, { "epoch": 0.13462464770689214, "grad_norm": 0.6489667892456055, "learning_rate": 9.805918010585124e-05, "loss": 3.4438, "step": 1642 }, { "epoch": 0.13478862413528056, "grad_norm": 0.5966881513595581, "learning_rate": 9.805169228233608e-05, "loss": 3.4465, "step": 1644 }, { "epoch": 0.13495260056366898, "grad_norm": 0.6900391578674316, "learning_rate": 9.804419032933377e-05, "loss": 3.5272, "step": 1646 }, { "epoch": 0.1351165769920574, "grad_norm": 0.7607645988464355, "learning_rate": 9.80366742490502e-05, "loss": 3.4956, "step": 1648 }, { "epoch": 0.13528055342044581, "grad_norm": 0.6069225668907166, "learning_rate": 9.802914404369548e-05, "loss": 3.4641, "step": 1650 }, { "epoch": 0.13544452984883423, "grad_norm": 0.6781127452850342, "learning_rate": 9.802159971548386e-05, "loss": 3.5116, "step": 1652 }, { "epoch": 0.13560850627722265, "grad_norm": 0.5579132437705994, "learning_rate": 9.801404126663372e-05, "loss": 3.5021, "step": 1654 }, { "epoch": 0.13577248270561107, "grad_norm": 0.8519318103790283, "learning_rate": 9.800646869936758e-05, "loss": 3.4645, "step": 1656 }, { "epoch": 0.1359364591339995, "grad_norm": 0.8295395374298096, "learning_rate": 9.799888201591219e-05, "loss": 3.4875, "step": 1658 }, { "epoch": 0.1361004355623879, "grad_norm": 0.7860473990440369, "learning_rate": 9.799128121849835e-05, "loss": 3.5143, "step": 1660 }, { "epoch": 0.13626441199077632, "grad_norm": 0.676199197769165, "learning_rate": 9.798366630936107e-05, "loss": 3.4924, "step": 1662 }, { "epoch": 0.13642838841916474, "grad_norm": 0.7471193075180054, "learning_rate": 9.797603729073949e-05, "loss": 3.4606, "step": 1664 }, { "epoch": 0.13659236484755316, "grad_norm": 0.7911469340324402, "learning_rate": 9.796839416487693e-05, "loss": 3.487, "step": 1666 }, { "epoch": 0.13675634127594158, "grad_norm": 0.7229553461074829, "learning_rate": 9.796073693402081e-05, "loss": 3.5058, "step": 1668 }, { "epoch": 0.13692031770433, "grad_norm": 0.7046807408332825, "learning_rate": 9.795306560042272e-05, "loss": 3.4739, "step": 1670 }, { "epoch": 0.13708429413271842, "grad_norm": 0.7285602688789368, "learning_rate": 9.794538016633842e-05, "loss": 3.4592, "step": 1672 }, { "epoch": 0.13724827056110683, "grad_norm": 0.7747913002967834, "learning_rate": 9.793768063402777e-05, "loss": 3.4593, "step": 1674 }, { "epoch": 0.13741224698949525, "grad_norm": 0.7013533711433411, "learning_rate": 9.792996700575481e-05, "loss": 3.5569, "step": 1676 }, { "epoch": 0.13757622341788367, "grad_norm": 0.6556512117385864, "learning_rate": 9.792223928378772e-05, "loss": 3.4342, "step": 1678 }, { "epoch": 0.1377401998462721, "grad_norm": 0.7427647709846497, "learning_rate": 9.79144974703988e-05, "loss": 3.45, "step": 1680 }, { "epoch": 0.1379041762746605, "grad_norm": 0.7319619059562683, "learning_rate": 9.790674156786452e-05, "loss": 3.5378, "step": 1682 }, { "epoch": 0.13806815270304892, "grad_norm": 0.6408218741416931, "learning_rate": 9.78989715784655e-05, "loss": 3.5022, "step": 1684 }, { "epoch": 0.13823212913143734, "grad_norm": 0.5858979821205139, "learning_rate": 9.789118750448647e-05, "loss": 3.461, "step": 1686 }, { "epoch": 0.1383961055598258, "grad_norm": 0.6623833775520325, "learning_rate": 9.788338934821632e-05, "loss": 3.5288, "step": 1688 }, { "epoch": 0.1385600819882142, "grad_norm": 0.6391535997390747, "learning_rate": 9.787557711194808e-05, "loss": 3.5074, "step": 1690 }, { "epoch": 0.13872405841660262, "grad_norm": 0.5775202512741089, "learning_rate": 9.786775079797893e-05, "loss": 3.4467, "step": 1692 }, { "epoch": 0.13888803484499104, "grad_norm": 0.6308355927467346, "learning_rate": 9.785991040861017e-05, "loss": 3.4568, "step": 1694 }, { "epoch": 0.13905201127337946, "grad_norm": 0.7259300351142883, "learning_rate": 9.785205594614725e-05, "loss": 3.5018, "step": 1696 }, { "epoch": 0.13921598770176788, "grad_norm": 0.7123456001281738, "learning_rate": 9.784418741289975e-05, "loss": 3.4491, "step": 1698 }, { "epoch": 0.1393799641301563, "grad_norm": 0.7426223754882812, "learning_rate": 9.783630481118141e-05, "loss": 3.5045, "step": 1700 }, { "epoch": 0.13954394055854472, "grad_norm": 0.8505781292915344, "learning_rate": 9.782840814331007e-05, "loss": 3.5193, "step": 1702 }, { "epoch": 0.13970791698693313, "grad_norm": 0.7968536615371704, "learning_rate": 9.782049741160775e-05, "loss": 3.499, "step": 1704 }, { "epoch": 0.13987189341532155, "grad_norm": 0.7513880729675293, "learning_rate": 9.781257261840055e-05, "loss": 3.4711, "step": 1706 }, { "epoch": 0.14003586984370997, "grad_norm": 0.8544629812240601, "learning_rate": 9.780463376601878e-05, "loss": 3.4969, "step": 1708 }, { "epoch": 0.1401998462720984, "grad_norm": 0.5832239985466003, "learning_rate": 9.77966808567968e-05, "loss": 3.3526, "step": 1710 }, { "epoch": 0.1403638227004868, "grad_norm": 0.7072123885154724, "learning_rate": 9.778871389307318e-05, "loss": 3.4907, "step": 1712 }, { "epoch": 0.14052779912887522, "grad_norm": 0.8044034242630005, "learning_rate": 9.778073287719054e-05, "loss": 3.5346, "step": 1714 }, { "epoch": 0.14069177555726364, "grad_norm": 0.6524981260299683, "learning_rate": 9.777273781149574e-05, "loss": 3.429, "step": 1716 }, { "epoch": 0.14085575198565206, "grad_norm": 0.5953693389892578, "learning_rate": 9.776472869833965e-05, "loss": 3.4326, "step": 1718 }, { "epoch": 0.14101972841404048, "grad_norm": 0.586199164390564, "learning_rate": 9.775670554007736e-05, "loss": 3.382, "step": 1720 }, { "epoch": 0.1411837048424289, "grad_norm": 0.6465304493904114, "learning_rate": 9.774866833906808e-05, "loss": 3.4143, "step": 1722 }, { "epoch": 0.14134768127081732, "grad_norm": 0.8190158009529114, "learning_rate": 9.774061709767508e-05, "loss": 3.4286, "step": 1724 }, { "epoch": 0.14151165769920573, "grad_norm": 0.778614342212677, "learning_rate": 9.773255181826586e-05, "loss": 3.4458, "step": 1726 }, { "epoch": 0.14167563412759415, "grad_norm": 0.8797032237052917, "learning_rate": 9.772447250321197e-05, "loss": 3.4719, "step": 1728 }, { "epoch": 0.14183961055598257, "grad_norm": 0.6563115119934082, "learning_rate": 9.771637915488911e-05, "loss": 3.3792, "step": 1730 }, { "epoch": 0.142003586984371, "grad_norm": 0.823006272315979, "learning_rate": 9.770827177567712e-05, "loss": 3.4733, "step": 1732 }, { "epoch": 0.1421675634127594, "grad_norm": 0.7860798239707947, "learning_rate": 9.770015036795996e-05, "loss": 3.5151, "step": 1734 }, { "epoch": 0.14233153984114782, "grad_norm": 0.8051521182060242, "learning_rate": 9.76920149341257e-05, "loss": 3.4156, "step": 1736 }, { "epoch": 0.14249551626953624, "grad_norm": 0.6009500026702881, "learning_rate": 9.768386547656655e-05, "loss": 3.436, "step": 1738 }, { "epoch": 0.14265949269792466, "grad_norm": 0.68117356300354, "learning_rate": 9.767570199767883e-05, "loss": 3.4671, "step": 1740 }, { "epoch": 0.14282346912631308, "grad_norm": 0.6417118906974792, "learning_rate": 9.766752449986301e-05, "loss": 3.4416, "step": 1742 }, { "epoch": 0.14298744555470153, "grad_norm": 0.6248669624328613, "learning_rate": 9.765933298552366e-05, "loss": 3.3769, "step": 1744 }, { "epoch": 0.14315142198308994, "grad_norm": 0.5746626853942871, "learning_rate": 9.765112745706945e-05, "loss": 3.408, "step": 1746 }, { "epoch": 0.14331539841147836, "grad_norm": 0.6235172748565674, "learning_rate": 9.764290791691324e-05, "loss": 3.4454, "step": 1748 }, { "epoch": 0.14347937483986678, "grad_norm": 0.63954758644104, "learning_rate": 9.763467436747193e-05, "loss": 3.4275, "step": 1750 }, { "epoch": 0.1436433512682552, "grad_norm": 0.6917594075202942, "learning_rate": 9.76264268111666e-05, "loss": 3.3796, "step": 1752 }, { "epoch": 0.14380732769664362, "grad_norm": 0.6280871629714966, "learning_rate": 9.76181652504224e-05, "loss": 3.4056, "step": 1754 }, { "epoch": 0.14397130412503203, "grad_norm": 0.5994766354560852, "learning_rate": 9.760988968766864e-05, "loss": 3.458, "step": 1756 }, { "epoch": 0.14413528055342045, "grad_norm": 0.7142448425292969, "learning_rate": 9.760160012533872e-05, "loss": 3.4442, "step": 1758 }, { "epoch": 0.14429925698180887, "grad_norm": 0.632342517375946, "learning_rate": 9.759329656587017e-05, "loss": 3.4396, "step": 1760 }, { "epoch": 0.1444632334101973, "grad_norm": 0.6980354189872742, "learning_rate": 9.758497901170465e-05, "loss": 3.4684, "step": 1762 }, { "epoch": 0.1446272098385857, "grad_norm": 0.6333186626434326, "learning_rate": 9.75766474652879e-05, "loss": 3.4394, "step": 1764 }, { "epoch": 0.14479118626697413, "grad_norm": 0.5935460329055786, "learning_rate": 9.756830192906978e-05, "loss": 3.3757, "step": 1766 }, { "epoch": 0.14495516269536254, "grad_norm": 0.6060703992843628, "learning_rate": 9.75599424055043e-05, "loss": 3.3985, "step": 1768 }, { "epoch": 0.14511913912375096, "grad_norm": 0.7013797760009766, "learning_rate": 9.755156889704953e-05, "loss": 3.4613, "step": 1770 }, { "epoch": 0.14528311555213938, "grad_norm": 0.6497318744659424, "learning_rate": 9.75431814061677e-05, "loss": 3.3849, "step": 1772 }, { "epoch": 0.1454470919805278, "grad_norm": 0.6609060764312744, "learning_rate": 9.753477993532514e-05, "loss": 3.3863, "step": 1774 }, { "epoch": 0.14561106840891622, "grad_norm": 0.6392355561256409, "learning_rate": 9.752636448699227e-05, "loss": 3.4147, "step": 1776 }, { "epoch": 0.14577504483730463, "grad_norm": 0.729839563369751, "learning_rate": 9.751793506364362e-05, "loss": 3.3952, "step": 1778 }, { "epoch": 0.14593902126569305, "grad_norm": 0.6394525170326233, "learning_rate": 9.750949166775786e-05, "loss": 3.4272, "step": 1780 }, { "epoch": 0.14610299769408147, "grad_norm": 0.7037297487258911, "learning_rate": 9.750103430181776e-05, "loss": 3.3667, "step": 1782 }, { "epoch": 0.1462669741224699, "grad_norm": 0.6344433426856995, "learning_rate": 9.749256296831017e-05, "loss": 3.3558, "step": 1784 }, { "epoch": 0.1464309505508583, "grad_norm": 0.6455307006835938, "learning_rate": 9.748407766972607e-05, "loss": 3.3936, "step": 1786 }, { "epoch": 0.14659492697924673, "grad_norm": 0.7534605860710144, "learning_rate": 9.747557840856055e-05, "loss": 3.3973, "step": 1788 }, { "epoch": 0.14675890340763514, "grad_norm": 0.6741543412208557, "learning_rate": 9.746706518731278e-05, "loss": 3.4123, "step": 1790 }, { "epoch": 0.14692287983602356, "grad_norm": 0.7014438509941101, "learning_rate": 9.745853800848606e-05, "loss": 3.4118, "step": 1792 }, { "epoch": 0.14708685626441198, "grad_norm": 0.6591073870658875, "learning_rate": 9.74499968745878e-05, "loss": 3.4319, "step": 1794 }, { "epoch": 0.1472508326928004, "grad_norm": 0.6363744735717773, "learning_rate": 9.74414417881295e-05, "loss": 3.384, "step": 1796 }, { "epoch": 0.14741480912118882, "grad_norm": 0.6241241097450256, "learning_rate": 9.743287275162673e-05, "loss": 3.4458, "step": 1798 }, { "epoch": 0.14757878554957723, "grad_norm": 0.7173709869384766, "learning_rate": 9.742428976759925e-05, "loss": 3.4145, "step": 1800 }, { "epoch": 0.14774276197796568, "grad_norm": 0.6002538800239563, "learning_rate": 9.741569283857082e-05, "loss": 3.3948, "step": 1802 }, { "epoch": 0.1479067384063541, "grad_norm": 0.7210296988487244, "learning_rate": 9.740708196706936e-05, "loss": 3.3912, "step": 1804 }, { "epoch": 0.14807071483474252, "grad_norm": 0.5635441541671753, "learning_rate": 9.739845715562688e-05, "loss": 3.4489, "step": 1806 }, { "epoch": 0.14823469126313094, "grad_norm": 0.6474645733833313, "learning_rate": 9.738981840677948e-05, "loss": 3.429, "step": 1808 }, { "epoch": 0.14839866769151935, "grad_norm": 0.6213793754577637, "learning_rate": 9.738116572306737e-05, "loss": 3.4043, "step": 1810 }, { "epoch": 0.14856264411990777, "grad_norm": 0.5918754935264587, "learning_rate": 9.737249910703485e-05, "loss": 3.371, "step": 1812 }, { "epoch": 0.1487266205482962, "grad_norm": 0.5595375299453735, "learning_rate": 9.736381856123034e-05, "loss": 3.3467, "step": 1814 }, { "epoch": 0.1488905969766846, "grad_norm": 0.5739578604698181, "learning_rate": 9.735512408820628e-05, "loss": 3.4073, "step": 1816 }, { "epoch": 0.14905457340507303, "grad_norm": 0.5913086533546448, "learning_rate": 9.73464156905193e-05, "loss": 3.37, "step": 1818 }, { "epoch": 0.14921854983346144, "grad_norm": 0.5342605710029602, "learning_rate": 9.733769337073009e-05, "loss": 3.3615, "step": 1820 }, { "epoch": 0.14938252626184986, "grad_norm": 0.6710630655288696, "learning_rate": 9.73289571314034e-05, "loss": 3.3903, "step": 1822 }, { "epoch": 0.14954650269023828, "grad_norm": 0.6280022263526917, "learning_rate": 9.732020697510811e-05, "loss": 3.366, "step": 1824 }, { "epoch": 0.1497104791186267, "grad_norm": 0.6352916955947876, "learning_rate": 9.731144290441718e-05, "loss": 3.4006, "step": 1826 }, { "epoch": 0.14987445554701512, "grad_norm": 0.6925874948501587, "learning_rate": 9.730266492190769e-05, "loss": 3.4513, "step": 1828 }, { "epoch": 0.15003843197540354, "grad_norm": 0.7678630948066711, "learning_rate": 9.729387303016076e-05, "loss": 3.3515, "step": 1830 }, { "epoch": 0.15020240840379195, "grad_norm": 0.7654356956481934, "learning_rate": 9.728506723176162e-05, "loss": 3.357, "step": 1832 }, { "epoch": 0.15036638483218037, "grad_norm": 0.6860572695732117, "learning_rate": 9.727624752929962e-05, "loss": 3.4024, "step": 1834 }, { "epoch": 0.1505303612605688, "grad_norm": 0.6397068500518799, "learning_rate": 9.726741392536815e-05, "loss": 3.4281, "step": 1836 }, { "epoch": 0.1506943376889572, "grad_norm": 0.6445949673652649, "learning_rate": 9.725856642256472e-05, "loss": 3.3339, "step": 1838 }, { "epoch": 0.15085831411734563, "grad_norm": 0.6427408456802368, "learning_rate": 9.724970502349091e-05, "loss": 3.3984, "step": 1840 }, { "epoch": 0.15102229054573404, "grad_norm": 0.6301809549331665, "learning_rate": 9.72408297307524e-05, "loss": 3.3884, "step": 1842 }, { "epoch": 0.15118626697412246, "grad_norm": 0.561808705329895, "learning_rate": 9.723194054695894e-05, "loss": 3.39, "step": 1844 }, { "epoch": 0.15135024340251088, "grad_norm": 0.5919866561889648, "learning_rate": 9.722303747472441e-05, "loss": 3.3677, "step": 1846 }, { "epoch": 0.1515142198308993, "grad_norm": 0.6336367726325989, "learning_rate": 9.721412051666668e-05, "loss": 3.3986, "step": 1848 }, { "epoch": 0.15167819625928772, "grad_norm": 0.687470018863678, "learning_rate": 9.720518967540781e-05, "loss": 3.3543, "step": 1850 }, { "epoch": 0.15184217268767614, "grad_norm": 0.7600200176239014, "learning_rate": 9.719624495357387e-05, "loss": 3.4157, "step": 1852 }, { "epoch": 0.15200614911606455, "grad_norm": 0.6732688546180725, "learning_rate": 9.718728635379502e-05, "loss": 3.4003, "step": 1854 }, { "epoch": 0.15217012554445297, "grad_norm": 0.7202364206314087, "learning_rate": 9.717831387870555e-05, "loss": 3.3777, "step": 1856 }, { "epoch": 0.1523341019728414, "grad_norm": 0.6364483833312988, "learning_rate": 9.716932753094376e-05, "loss": 3.3864, "step": 1858 }, { "epoch": 0.15249807840122984, "grad_norm": 0.5882256627082825, "learning_rate": 9.71603273131521e-05, "loss": 3.3319, "step": 1860 }, { "epoch": 0.15266205482961825, "grad_norm": 0.716076672077179, "learning_rate": 9.715131322797704e-05, "loss": 3.4332, "step": 1862 }, { "epoch": 0.15282603125800667, "grad_norm": 0.6526336073875427, "learning_rate": 9.714228527806915e-05, "loss": 3.3591, "step": 1864 }, { "epoch": 0.1529900076863951, "grad_norm": 0.588830292224884, "learning_rate": 9.71332434660831e-05, "loss": 3.4014, "step": 1866 }, { "epoch": 0.1531539841147835, "grad_norm": 0.5935143232345581, "learning_rate": 9.712418779467758e-05, "loss": 3.3398, "step": 1868 }, { "epoch": 0.15331796054317193, "grad_norm": 0.6331619620323181, "learning_rate": 9.71151182665154e-05, "loss": 3.3374, "step": 1870 }, { "epoch": 0.15348193697156035, "grad_norm": 0.5878372192382812, "learning_rate": 9.710603488426345e-05, "loss": 3.3286, "step": 1872 }, { "epoch": 0.15364591339994876, "grad_norm": 0.5949060320854187, "learning_rate": 9.709693765059266e-05, "loss": 3.3715, "step": 1874 }, { "epoch": 0.15380988982833718, "grad_norm": 0.665457546710968, "learning_rate": 9.708782656817807e-05, "loss": 3.355, "step": 1876 }, { "epoch": 0.1539738662567256, "grad_norm": 0.6434701681137085, "learning_rate": 9.707870163969874e-05, "loss": 3.3317, "step": 1878 }, { "epoch": 0.15413784268511402, "grad_norm": 0.6445391774177551, "learning_rate": 9.706956286783786e-05, "loss": 3.3943, "step": 1880 }, { "epoch": 0.15430181911350244, "grad_norm": 0.6335451602935791, "learning_rate": 9.706041025528266e-05, "loss": 3.3645, "step": 1882 }, { "epoch": 0.15446579554189085, "grad_norm": 0.6084844470024109, "learning_rate": 9.705124380472443e-05, "loss": 3.3914, "step": 1884 }, { "epoch": 0.15462977197027927, "grad_norm": 0.6261113286018372, "learning_rate": 9.704206351885857e-05, "loss": 3.3381, "step": 1886 }, { "epoch": 0.1547937483986677, "grad_norm": 0.6548987030982971, "learning_rate": 9.703286940038449e-05, "loss": 3.3277, "step": 1888 }, { "epoch": 0.1549577248270561, "grad_norm": 0.6208562254905701, "learning_rate": 9.702366145200573e-05, "loss": 3.3789, "step": 1890 }, { "epoch": 0.15512170125544453, "grad_norm": 0.6488550901412964, "learning_rate": 9.701443967642984e-05, "loss": 3.3861, "step": 1892 }, { "epoch": 0.15528567768383295, "grad_norm": 0.6071347594261169, "learning_rate": 9.700520407636849e-05, "loss": 3.4027, "step": 1894 }, { "epoch": 0.15544965411222136, "grad_norm": 0.6656597852706909, "learning_rate": 9.699595465453734e-05, "loss": 3.3003, "step": 1896 }, { "epoch": 0.15561363054060978, "grad_norm": 0.6349019408226013, "learning_rate": 9.69866914136562e-05, "loss": 3.398, "step": 1898 }, { "epoch": 0.1557776069689982, "grad_norm": 0.6088286638259888, "learning_rate": 9.69774143564489e-05, "loss": 3.3114, "step": 1900 }, { "epoch": 0.15594158339738662, "grad_norm": 0.8147541284561157, "learning_rate": 9.696812348564331e-05, "loss": 3.3856, "step": 1902 }, { "epoch": 0.15610555982577504, "grad_norm": 0.6513493657112122, "learning_rate": 9.695881880397143e-05, "loss": 3.4239, "step": 1904 }, { "epoch": 0.15626953625416345, "grad_norm": 0.6540910601615906, "learning_rate": 9.694950031416925e-05, "loss": 3.3575, "step": 1906 }, { "epoch": 0.15643351268255187, "grad_norm": 0.6017822027206421, "learning_rate": 9.694016801897685e-05, "loss": 3.3468, "step": 1908 }, { "epoch": 0.1565974891109403, "grad_norm": 0.7597635984420776, "learning_rate": 9.693082192113839e-05, "loss": 3.3359, "step": 1910 }, { "epoch": 0.1567614655393287, "grad_norm": 0.8275761008262634, "learning_rate": 9.692146202340206e-05, "loss": 3.3465, "step": 1912 }, { "epoch": 0.15692544196771713, "grad_norm": 0.8236324787139893, "learning_rate": 9.69120883285201e-05, "loss": 3.3518, "step": 1914 }, { "epoch": 0.15708941839610557, "grad_norm": 0.7957652807235718, "learning_rate": 9.690270083924883e-05, "loss": 3.3424, "step": 1916 }, { "epoch": 0.157253394824494, "grad_norm": 0.7953089475631714, "learning_rate": 9.689329955834865e-05, "loss": 3.353, "step": 1918 }, { "epoch": 0.1574173712528824, "grad_norm": 0.7492114305496216, "learning_rate": 9.688388448858394e-05, "loss": 3.3389, "step": 1920 }, { "epoch": 0.15758134768127083, "grad_norm": 0.612477719783783, "learning_rate": 9.68744556327232e-05, "loss": 3.4137, "step": 1922 }, { "epoch": 0.15774532410965925, "grad_norm": 0.6381865739822388, "learning_rate": 9.686501299353895e-05, "loss": 3.3281, "step": 1924 }, { "epoch": 0.15790930053804766, "grad_norm": 0.6546152234077454, "learning_rate": 9.68555565738078e-05, "loss": 3.3501, "step": 1926 }, { "epoch": 0.15807327696643608, "grad_norm": 0.6780794262886047, "learning_rate": 9.684608637631036e-05, "loss": 3.3393, "step": 1928 }, { "epoch": 0.1582372533948245, "grad_norm": 0.637367308139801, "learning_rate": 9.683660240383135e-05, "loss": 3.3353, "step": 1930 }, { "epoch": 0.15840122982321292, "grad_norm": 0.647280216217041, "learning_rate": 9.68271046591595e-05, "loss": 3.4004, "step": 1932 }, { "epoch": 0.15856520625160134, "grad_norm": 0.6508013010025024, "learning_rate": 9.681759314508758e-05, "loss": 3.3203, "step": 1934 }, { "epoch": 0.15872918267998976, "grad_norm": 0.6182466745376587, "learning_rate": 9.680806786441244e-05, "loss": 3.3234, "step": 1936 }, { "epoch": 0.15889315910837817, "grad_norm": 0.6148533821105957, "learning_rate": 9.679852881993496e-05, "loss": 3.4097, "step": 1938 }, { "epoch": 0.1590571355367666, "grad_norm": 0.7295002937316895, "learning_rate": 9.678897601446008e-05, "loss": 3.3832, "step": 1940 }, { "epoch": 0.159221111965155, "grad_norm": 0.611003041267395, "learning_rate": 9.67794094507968e-05, "loss": 3.3381, "step": 1942 }, { "epoch": 0.15938508839354343, "grad_norm": 0.6375026106834412, "learning_rate": 9.676982913175813e-05, "loss": 3.3224, "step": 1944 }, { "epoch": 0.15954906482193185, "grad_norm": 0.5718812942504883, "learning_rate": 9.676023506016112e-05, "loss": 3.3178, "step": 1946 }, { "epoch": 0.15971304125032026, "grad_norm": 0.5863606333732605, "learning_rate": 9.675062723882691e-05, "loss": 3.3181, "step": 1948 }, { "epoch": 0.15987701767870868, "grad_norm": 0.6420906782150269, "learning_rate": 9.674100567058064e-05, "loss": 3.3457, "step": 1950 }, { "epoch": 0.1600409941070971, "grad_norm": 0.6146227121353149, "learning_rate": 9.673137035825153e-05, "loss": 3.3324, "step": 1952 }, { "epoch": 0.16020497053548552, "grad_norm": 0.6017966866493225, "learning_rate": 9.672172130467281e-05, "loss": 3.3241, "step": 1954 }, { "epoch": 0.16036894696387394, "grad_norm": 0.6874404549598694, "learning_rate": 9.671205851268175e-05, "loss": 3.3436, "step": 1956 }, { "epoch": 0.16053292339226236, "grad_norm": 0.6882346868515015, "learning_rate": 9.670238198511969e-05, "loss": 3.3263, "step": 1958 }, { "epoch": 0.16069689982065077, "grad_norm": 0.7165938019752502, "learning_rate": 9.669269172483197e-05, "loss": 3.2836, "step": 1960 }, { "epoch": 0.1608608762490392, "grad_norm": 0.7419902086257935, "learning_rate": 9.668298773466802e-05, "loss": 3.3599, "step": 1962 }, { "epoch": 0.1610248526774276, "grad_norm": 0.7155027985572815, "learning_rate": 9.667327001748125e-05, "loss": 3.328, "step": 1964 }, { "epoch": 0.16118882910581603, "grad_norm": 0.5572860836982727, "learning_rate": 9.666353857612913e-05, "loss": 3.2618, "step": 1966 }, { "epoch": 0.16135280553420445, "grad_norm": 0.6426743865013123, "learning_rate": 9.665379341347318e-05, "loss": 3.3289, "step": 1968 }, { "epoch": 0.16151678196259286, "grad_norm": 0.7807396054267883, "learning_rate": 9.664403453237894e-05, "loss": 3.3503, "step": 1970 }, { "epoch": 0.16168075839098128, "grad_norm": 0.6256475448608398, "learning_rate": 9.663426193571598e-05, "loss": 3.368, "step": 1972 }, { "epoch": 0.16184473481936973, "grad_norm": 0.6786140203475952, "learning_rate": 9.662447562635791e-05, "loss": 3.3482, "step": 1974 }, { "epoch": 0.16200871124775815, "grad_norm": 0.6162734627723694, "learning_rate": 9.661467560718237e-05, "loss": 3.3226, "step": 1976 }, { "epoch": 0.16217268767614657, "grad_norm": 0.609999418258667, "learning_rate": 9.660486188107104e-05, "loss": 3.3118, "step": 1978 }, { "epoch": 0.16233666410453498, "grad_norm": 0.7471441626548767, "learning_rate": 9.659503445090963e-05, "loss": 3.3642, "step": 1980 }, { "epoch": 0.1625006405329234, "grad_norm": 0.6361717581748962, "learning_rate": 9.658519331958785e-05, "loss": 3.2547, "step": 1982 }, { "epoch": 0.16266461696131182, "grad_norm": 0.5587472915649414, "learning_rate": 9.657533848999947e-05, "loss": 3.3375, "step": 1984 }, { "epoch": 0.16282859338970024, "grad_norm": 0.6860288381576538, "learning_rate": 9.65654699650423e-05, "loss": 3.2796, "step": 1986 }, { "epoch": 0.16299256981808866, "grad_norm": 0.6986459493637085, "learning_rate": 9.655558774761813e-05, "loss": 3.3553, "step": 1988 }, { "epoch": 0.16315654624647707, "grad_norm": 0.7562621235847473, "learning_rate": 9.654569184063282e-05, "loss": 3.3779, "step": 1990 }, { "epoch": 0.1633205226748655, "grad_norm": 0.7100428938865662, "learning_rate": 9.653578224699622e-05, "loss": 3.3995, "step": 1992 }, { "epoch": 0.1634844991032539, "grad_norm": 0.776755690574646, "learning_rate": 9.652585896962223e-05, "loss": 3.3391, "step": 1994 }, { "epoch": 0.16364847553164233, "grad_norm": 0.6308813095092773, "learning_rate": 9.651592201142879e-05, "loss": 3.2949, "step": 1996 }, { "epoch": 0.16381245196003075, "grad_norm": 0.7114334106445312, "learning_rate": 9.650597137533782e-05, "loss": 3.3665, "step": 1998 }, { "epoch": 0.16397642838841917, "grad_norm": 0.6608272194862366, "learning_rate": 9.649600706427525e-05, "loss": 3.2825, "step": 2000 }, { "epoch": 0.16414040481680758, "grad_norm": 0.6543484926223755, "learning_rate": 9.648602908117112e-05, "loss": 3.3447, "step": 2002 }, { "epoch": 0.164304381245196, "grad_norm": 0.6831576228141785, "learning_rate": 9.647603742895939e-05, "loss": 3.3979, "step": 2004 }, { "epoch": 0.16446835767358442, "grad_norm": 0.717369019985199, "learning_rate": 9.646603211057809e-05, "loss": 3.3508, "step": 2006 }, { "epoch": 0.16463233410197284, "grad_norm": 0.6552402973175049, "learning_rate": 9.645601312896929e-05, "loss": 3.326, "step": 2008 }, { "epoch": 0.16479631053036126, "grad_norm": 0.7372413277626038, "learning_rate": 9.644598048707901e-05, "loss": 3.3048, "step": 2010 }, { "epoch": 0.16496028695874967, "grad_norm": 0.5617173910140991, "learning_rate": 9.643593418785734e-05, "loss": 3.3341, "step": 2012 }, { "epoch": 0.1651242633871381, "grad_norm": 0.6190782785415649, "learning_rate": 9.642587423425839e-05, "loss": 3.3441, "step": 2014 }, { "epoch": 0.1652882398155265, "grad_norm": 0.6181708574295044, "learning_rate": 9.641580062924022e-05, "loss": 3.3729, "step": 2016 }, { "epoch": 0.16545221624391493, "grad_norm": 0.5956866145133972, "learning_rate": 9.640571337576499e-05, "loss": 3.3385, "step": 2018 }, { "epoch": 0.16561619267230335, "grad_norm": 0.7407371401786804, "learning_rate": 9.639561247679883e-05, "loss": 3.2941, "step": 2020 }, { "epoch": 0.16578016910069177, "grad_norm": 0.6292521953582764, "learning_rate": 9.638549793531186e-05, "loss": 3.3027, "step": 2022 }, { "epoch": 0.16594414552908018, "grad_norm": 0.6599383354187012, "learning_rate": 9.637536975427826e-05, "loss": 3.2438, "step": 2024 }, { "epoch": 0.1661081219574686, "grad_norm": 0.6305571794509888, "learning_rate": 9.636522793667617e-05, "loss": 3.3542, "step": 2026 }, { "epoch": 0.16627209838585702, "grad_norm": 0.6023452877998352, "learning_rate": 9.635507248548781e-05, "loss": 3.2435, "step": 2028 }, { "epoch": 0.16643607481424547, "grad_norm": 0.60383540391922, "learning_rate": 9.634490340369933e-05, "loss": 3.3507, "step": 2030 }, { "epoch": 0.16660005124263388, "grad_norm": 0.6280787587165833, "learning_rate": 9.633472069430094e-05, "loss": 3.343, "step": 2032 }, { "epoch": 0.1667640276710223, "grad_norm": 0.6468386650085449, "learning_rate": 9.632452436028685e-05, "loss": 3.2778, "step": 2034 }, { "epoch": 0.16692800409941072, "grad_norm": 0.6163133978843689, "learning_rate": 9.631431440465526e-05, "loss": 3.2935, "step": 2036 }, { "epoch": 0.16709198052779914, "grad_norm": 0.6122549772262573, "learning_rate": 9.630409083040837e-05, "loss": 3.3503, "step": 2038 }, { "epoch": 0.16725595695618756, "grad_norm": 0.7354429364204407, "learning_rate": 9.629385364055242e-05, "loss": 3.3263, "step": 2040 }, { "epoch": 0.16741993338457598, "grad_norm": 0.5872082710266113, "learning_rate": 9.628360283809761e-05, "loss": 3.2704, "step": 2042 }, { "epoch": 0.1675839098129644, "grad_norm": 0.6023679375648499, "learning_rate": 9.627333842605819e-05, "loss": 3.2958, "step": 2044 }, { "epoch": 0.1677478862413528, "grad_norm": 0.5730355978012085, "learning_rate": 9.626306040745237e-05, "loss": 3.3174, "step": 2046 }, { "epoch": 0.16791186266974123, "grad_norm": 0.5722047090530396, "learning_rate": 9.625276878530237e-05, "loss": 3.3153, "step": 2048 }, { "epoch": 0.16807583909812965, "grad_norm": 0.6651049852371216, "learning_rate": 9.624246356263444e-05, "loss": 3.3299, "step": 2050 }, { "epoch": 0.16823981552651807, "grad_norm": 0.6372424364089966, "learning_rate": 9.623214474247878e-05, "loss": 3.3651, "step": 2052 }, { "epoch": 0.16840379195490648, "grad_norm": 0.5711623430252075, "learning_rate": 9.622181232786963e-05, "loss": 3.3216, "step": 2054 }, { "epoch": 0.1685677683832949, "grad_norm": 0.6460525393486023, "learning_rate": 9.621146632184521e-05, "loss": 3.3674, "step": 2056 }, { "epoch": 0.16873174481168332, "grad_norm": 0.6044295430183411, "learning_rate": 9.620110672744776e-05, "loss": 3.3083, "step": 2058 }, { "epoch": 0.16889572124007174, "grad_norm": 0.5659945011138916, "learning_rate": 9.619073354772344e-05, "loss": 3.3649, "step": 2060 }, { "epoch": 0.16905969766846016, "grad_norm": 0.5560106039047241, "learning_rate": 9.618034678572252e-05, "loss": 3.2848, "step": 2062 }, { "epoch": 0.16922367409684858, "grad_norm": 0.5644478797912598, "learning_rate": 9.616994644449915e-05, "loss": 3.3233, "step": 2064 }, { "epoch": 0.169387650525237, "grad_norm": 0.6400248408317566, "learning_rate": 9.615953252711157e-05, "loss": 3.3204, "step": 2066 }, { "epoch": 0.1695516269536254, "grad_norm": 0.5804336667060852, "learning_rate": 9.614910503662196e-05, "loss": 3.3332, "step": 2068 }, { "epoch": 0.16971560338201383, "grad_norm": 0.6843202710151672, "learning_rate": 9.613866397609646e-05, "loss": 3.3108, "step": 2070 }, { "epoch": 0.16987957981040225, "grad_norm": 0.6259203553199768, "learning_rate": 9.612820934860529e-05, "loss": 3.2955, "step": 2072 }, { "epoch": 0.17004355623879067, "grad_norm": 0.7539075016975403, "learning_rate": 9.611774115722258e-05, "loss": 3.3332, "step": 2074 }, { "epoch": 0.17020753266717908, "grad_norm": 0.6109238266944885, "learning_rate": 9.610725940502648e-05, "loss": 3.2937, "step": 2076 }, { "epoch": 0.1703715090955675, "grad_norm": 0.6348362565040588, "learning_rate": 9.609676409509912e-05, "loss": 3.2358, "step": 2078 }, { "epoch": 0.17053548552395592, "grad_norm": 0.7391447424888611, "learning_rate": 9.608625523052663e-05, "loss": 3.3637, "step": 2080 }, { "epoch": 0.17069946195234434, "grad_norm": 0.6116240620613098, "learning_rate": 9.607573281439913e-05, "loss": 3.299, "step": 2082 }, { "epoch": 0.17086343838073276, "grad_norm": 0.6683641672134399, "learning_rate": 9.60651968498107e-05, "loss": 3.3175, "step": 2084 }, { "epoch": 0.17102741480912118, "grad_norm": 0.5680612921714783, "learning_rate": 9.605464733985941e-05, "loss": 3.2804, "step": 2086 }, { "epoch": 0.17119139123750962, "grad_norm": 0.5645765066146851, "learning_rate": 9.60440842876473e-05, "loss": 3.307, "step": 2088 }, { "epoch": 0.17135536766589804, "grad_norm": 0.6338248252868652, "learning_rate": 9.603350769628045e-05, "loss": 3.2567, "step": 2090 }, { "epoch": 0.17151934409428646, "grad_norm": 0.6122543811798096, "learning_rate": 9.602291756886888e-05, "loss": 3.3027, "step": 2092 }, { "epoch": 0.17168332052267488, "grad_norm": 0.6619787216186523, "learning_rate": 9.601231390852656e-05, "loss": 3.2741, "step": 2094 }, { "epoch": 0.1718472969510633, "grad_norm": 0.5675482153892517, "learning_rate": 9.600169671837149e-05, "loss": 3.2811, "step": 2096 }, { "epoch": 0.1720112733794517, "grad_norm": 0.6895171999931335, "learning_rate": 9.599106600152563e-05, "loss": 3.3162, "step": 2098 }, { "epoch": 0.17217524980784013, "grad_norm": 0.7057105898857117, "learning_rate": 9.59804217611149e-05, "loss": 3.2611, "step": 2100 }, { "epoch": 0.17233922623622855, "grad_norm": 0.5840970277786255, "learning_rate": 9.596976400026925e-05, "loss": 3.2843, "step": 2102 }, { "epoch": 0.17250320266461697, "grad_norm": 0.6528168320655823, "learning_rate": 9.595909272212254e-05, "loss": 3.3212, "step": 2104 }, { "epoch": 0.17266717909300539, "grad_norm": 0.5484073162078857, "learning_rate": 9.594840792981265e-05, "loss": 3.2849, "step": 2106 }, { "epoch": 0.1728311555213938, "grad_norm": 0.5874817967414856, "learning_rate": 9.59377096264814e-05, "loss": 3.3081, "step": 2108 }, { "epoch": 0.17299513194978222, "grad_norm": 0.5669682025909424, "learning_rate": 9.592699781527461e-05, "loss": 3.3498, "step": 2110 }, { "epoch": 0.17315910837817064, "grad_norm": 0.6883938312530518, "learning_rate": 9.591627249934207e-05, "loss": 3.2798, "step": 2112 }, { "epoch": 0.17332308480655906, "grad_norm": 0.6597236394882202, "learning_rate": 9.590553368183753e-05, "loss": 3.2804, "step": 2114 }, { "epoch": 0.17348706123494748, "grad_norm": 0.5905894637107849, "learning_rate": 9.589478136591872e-05, "loss": 3.2825, "step": 2116 }, { "epoch": 0.1736510376633359, "grad_norm": 0.637214183807373, "learning_rate": 9.588401555474732e-05, "loss": 3.3519, "step": 2118 }, { "epoch": 0.1738150140917243, "grad_norm": 0.5943377017974854, "learning_rate": 9.587323625148899e-05, "loss": 3.2875, "step": 2120 }, { "epoch": 0.17397899052011273, "grad_norm": 0.49435749650001526, "learning_rate": 9.586244345931336e-05, "loss": 3.3332, "step": 2122 }, { "epoch": 0.17414296694850115, "grad_norm": 0.6532015800476074, "learning_rate": 9.585163718139405e-05, "loss": 3.3226, "step": 2124 }, { "epoch": 0.17430694337688957, "grad_norm": 0.6478725075721741, "learning_rate": 9.584081742090861e-05, "loss": 3.2902, "step": 2126 }, { "epoch": 0.17447091980527799, "grad_norm": 0.6059595942497253, "learning_rate": 9.582998418103854e-05, "loss": 3.3047, "step": 2128 }, { "epoch": 0.1746348962336664, "grad_norm": 0.6445087790489197, "learning_rate": 9.581913746496934e-05, "loss": 3.3007, "step": 2130 }, { "epoch": 0.17479887266205482, "grad_norm": 0.5795426964759827, "learning_rate": 9.580827727589048e-05, "loss": 3.3193, "step": 2132 }, { "epoch": 0.17496284909044324, "grad_norm": 0.6961095929145813, "learning_rate": 9.579740361699535e-05, "loss": 3.328, "step": 2134 }, { "epoch": 0.17512682551883166, "grad_norm": 0.7018781900405884, "learning_rate": 9.578651649148133e-05, "loss": 3.2589, "step": 2136 }, { "epoch": 0.17529080194722008, "grad_norm": 0.6638593077659607, "learning_rate": 9.577561590254977e-05, "loss": 3.3047, "step": 2138 }, { "epoch": 0.1754547783756085, "grad_norm": 0.8037712574005127, "learning_rate": 9.576470185340596e-05, "loss": 3.3166, "step": 2140 }, { "epoch": 0.1756187548039969, "grad_norm": 0.630042314529419, "learning_rate": 9.575377434725911e-05, "loss": 3.279, "step": 2142 }, { "epoch": 0.17578273123238533, "grad_norm": 0.5768330097198486, "learning_rate": 9.57428333873225e-05, "loss": 3.2599, "step": 2144 }, { "epoch": 0.17594670766077378, "grad_norm": 0.5603185296058655, "learning_rate": 9.573187897681322e-05, "loss": 3.2339, "step": 2146 }, { "epoch": 0.1761106840891622, "grad_norm": 0.7578685879707336, "learning_rate": 9.572091111895243e-05, "loss": 3.3329, "step": 2148 }, { "epoch": 0.1762746605175506, "grad_norm": 0.5876714587211609, "learning_rate": 9.57099298169652e-05, "loss": 3.2627, "step": 2150 }, { "epoch": 0.17643863694593903, "grad_norm": 0.6411724090576172, "learning_rate": 9.569893507408055e-05, "loss": 3.2554, "step": 2152 }, { "epoch": 0.17660261337432745, "grad_norm": 0.6768316626548767, "learning_rate": 9.568792689353143e-05, "loss": 3.2805, "step": 2154 }, { "epoch": 0.17676658980271587, "grad_norm": 0.7020912766456604, "learning_rate": 9.567690527855483e-05, "loss": 3.2498, "step": 2156 }, { "epoch": 0.17693056623110429, "grad_norm": 0.630198061466217, "learning_rate": 9.566587023239157e-05, "loss": 3.3052, "step": 2158 }, { "epoch": 0.1770945426594927, "grad_norm": 0.7058777213096619, "learning_rate": 9.565482175828653e-05, "loss": 3.2939, "step": 2160 }, { "epoch": 0.17725851908788112, "grad_norm": 0.684704601764679, "learning_rate": 9.564375985948846e-05, "loss": 3.2628, "step": 2162 }, { "epoch": 0.17742249551626954, "grad_norm": 0.6196277737617493, "learning_rate": 9.56326845392501e-05, "loss": 3.2804, "step": 2164 }, { "epoch": 0.17758647194465796, "grad_norm": 0.6597474217414856, "learning_rate": 9.562159580082808e-05, "loss": 3.357, "step": 2166 }, { "epoch": 0.17775044837304638, "grad_norm": 0.6917456388473511, "learning_rate": 9.561049364748307e-05, "loss": 3.2818, "step": 2168 }, { "epoch": 0.1779144248014348, "grad_norm": 0.6305201649665833, "learning_rate": 9.559937808247961e-05, "loss": 3.2252, "step": 2170 }, { "epoch": 0.1780784012298232, "grad_norm": 0.6192725896835327, "learning_rate": 9.55882491090862e-05, "loss": 3.3079, "step": 2172 }, { "epoch": 0.17824237765821163, "grad_norm": 0.6527867913246155, "learning_rate": 9.55771067305753e-05, "loss": 3.2065, "step": 2174 }, { "epoch": 0.17840635408660005, "grad_norm": 0.6737766265869141, "learning_rate": 9.556595095022331e-05, "loss": 3.2789, "step": 2176 }, { "epoch": 0.17857033051498847, "grad_norm": 0.5953449010848999, "learning_rate": 9.555478177131052e-05, "loss": 3.2632, "step": 2178 }, { "epoch": 0.17873430694337689, "grad_norm": 0.6226819157600403, "learning_rate": 9.554359919712124e-05, "loss": 3.2756, "step": 2180 }, { "epoch": 0.1788982833717653, "grad_norm": 0.6152170896530151, "learning_rate": 9.553240323094368e-05, "loss": 3.2677, "step": 2182 }, { "epoch": 0.17906225980015372, "grad_norm": 0.5997733473777771, "learning_rate": 9.552119387606997e-05, "loss": 3.2369, "step": 2184 }, { "epoch": 0.17922623622854214, "grad_norm": 0.5696983933448792, "learning_rate": 9.550997113579618e-05, "loss": 3.3002, "step": 2186 }, { "epoch": 0.17939021265693056, "grad_norm": 0.5793240666389465, "learning_rate": 9.549873501342237e-05, "loss": 3.224, "step": 2188 }, { "epoch": 0.17955418908531898, "grad_norm": 0.5453920364379883, "learning_rate": 9.548748551225246e-05, "loss": 3.3058, "step": 2190 }, { "epoch": 0.1797181655137074, "grad_norm": 0.558608889579773, "learning_rate": 9.547622263559437e-05, "loss": 3.3077, "step": 2192 }, { "epoch": 0.1798821419420958, "grad_norm": 0.577103853225708, "learning_rate": 9.546494638675989e-05, "loss": 3.2894, "step": 2194 }, { "epoch": 0.18004611837048423, "grad_norm": 0.5528544187545776, "learning_rate": 9.54536567690648e-05, "loss": 3.236, "step": 2196 }, { "epoch": 0.18021009479887265, "grad_norm": 0.5541223883628845, "learning_rate": 9.544235378582877e-05, "loss": 3.268, "step": 2198 }, { "epoch": 0.18037407122726107, "grad_norm": 0.5316542983055115, "learning_rate": 9.543103744037544e-05, "loss": 3.2532, "step": 2200 }, { "epoch": 0.18053804765564951, "grad_norm": 0.6354775428771973, "learning_rate": 9.541970773603233e-05, "loss": 3.2782, "step": 2202 }, { "epoch": 0.18070202408403793, "grad_norm": 0.5799155235290527, "learning_rate": 9.540836467613092e-05, "loss": 3.224, "step": 2204 }, { "epoch": 0.18086600051242635, "grad_norm": 0.6716517210006714, "learning_rate": 9.53970082640066e-05, "loss": 3.2579, "step": 2206 }, { "epoch": 0.18102997694081477, "grad_norm": 0.6734136343002319, "learning_rate": 9.53856385029987e-05, "loss": 3.2698, "step": 2208 }, { "epoch": 0.1811939533692032, "grad_norm": 0.7301097512245178, "learning_rate": 9.53742553964505e-05, "loss": 3.2738, "step": 2210 }, { "epoch": 0.1813579297975916, "grad_norm": 0.6413136720657349, "learning_rate": 9.536285894770914e-05, "loss": 3.2058, "step": 2212 }, { "epoch": 0.18152190622598002, "grad_norm": 0.6198046803474426, "learning_rate": 9.535144916012575e-05, "loss": 3.2869, "step": 2214 }, { "epoch": 0.18168588265436844, "grad_norm": 0.6237669587135315, "learning_rate": 9.534002603705532e-05, "loss": 3.223, "step": 2216 }, { "epoch": 0.18184985908275686, "grad_norm": 0.626846969127655, "learning_rate": 9.53285895818568e-05, "loss": 3.1932, "step": 2218 }, { "epoch": 0.18201383551114528, "grad_norm": 0.5577837824821472, "learning_rate": 9.531713979789308e-05, "loss": 3.2983, "step": 2220 }, { "epoch": 0.1821778119395337, "grad_norm": 0.5938622951507568, "learning_rate": 9.53056766885309e-05, "loss": 3.2237, "step": 2222 }, { "epoch": 0.18234178836792211, "grad_norm": 0.5766521096229553, "learning_rate": 9.529420025714099e-05, "loss": 3.3077, "step": 2224 }, { "epoch": 0.18250576479631053, "grad_norm": 0.5775324702262878, "learning_rate": 9.528271050709797e-05, "loss": 3.298, "step": 2226 }, { "epoch": 0.18266974122469895, "grad_norm": 0.575670599937439, "learning_rate": 9.527120744178034e-05, "loss": 3.2163, "step": 2228 }, { "epoch": 0.18283371765308737, "grad_norm": 0.5995689630508423, "learning_rate": 9.525969106457059e-05, "loss": 3.2937, "step": 2230 }, { "epoch": 0.1829976940814758, "grad_norm": 0.6333314180374146, "learning_rate": 9.524816137885506e-05, "loss": 3.2528, "step": 2232 }, { "epoch": 0.1831616705098642, "grad_norm": 0.6713608503341675, "learning_rate": 9.523661838802404e-05, "loss": 3.2981, "step": 2234 }, { "epoch": 0.18332564693825262, "grad_norm": 0.5735260844230652, "learning_rate": 9.52250620954717e-05, "loss": 3.2495, "step": 2236 }, { "epoch": 0.18348962336664104, "grad_norm": 0.5098928809165955, "learning_rate": 9.521349250459617e-05, "loss": 3.2896, "step": 2238 }, { "epoch": 0.18365359979502946, "grad_norm": 0.5835528373718262, "learning_rate": 9.520190961879942e-05, "loss": 3.3013, "step": 2240 }, { "epoch": 0.18381757622341788, "grad_norm": 0.5812976360321045, "learning_rate": 9.519031344148741e-05, "loss": 3.2155, "step": 2242 }, { "epoch": 0.1839815526518063, "grad_norm": 0.6407437324523926, "learning_rate": 9.517870397606996e-05, "loss": 3.3021, "step": 2244 }, { "epoch": 0.18414552908019471, "grad_norm": 0.6286873817443848, "learning_rate": 9.516708122596079e-05, "loss": 3.2593, "step": 2246 }, { "epoch": 0.18430950550858313, "grad_norm": 0.6462428569793701, "learning_rate": 9.515544519457755e-05, "loss": 3.2773, "step": 2248 }, { "epoch": 0.18447348193697155, "grad_norm": 0.5487723350524902, "learning_rate": 9.51437958853418e-05, "loss": 3.2368, "step": 2250 }, { "epoch": 0.18463745836535997, "grad_norm": 0.6235312819480896, "learning_rate": 9.513213330167898e-05, "loss": 3.2401, "step": 2252 }, { "epoch": 0.1848014347937484, "grad_norm": 0.6422250866889954, "learning_rate": 9.512045744701843e-05, "loss": 3.252, "step": 2254 }, { "epoch": 0.1849654112221368, "grad_norm": 0.6642846465110779, "learning_rate": 9.510876832479343e-05, "loss": 3.2247, "step": 2256 }, { "epoch": 0.18512938765052522, "grad_norm": 0.6005398631095886, "learning_rate": 9.509706593844114e-05, "loss": 3.2238, "step": 2258 }, { "epoch": 0.18529336407891367, "grad_norm": 0.5651940107345581, "learning_rate": 9.508535029140262e-05, "loss": 3.2212, "step": 2260 }, { "epoch": 0.1854573405073021, "grad_norm": 0.6252986788749695, "learning_rate": 9.507362138712282e-05, "loss": 3.2603, "step": 2262 }, { "epoch": 0.1856213169356905, "grad_norm": 0.6485080718994141, "learning_rate": 9.506187922905057e-05, "loss": 3.2465, "step": 2264 }, { "epoch": 0.18578529336407892, "grad_norm": 0.5912032723426819, "learning_rate": 9.505012382063869e-05, "loss": 3.1827, "step": 2266 }, { "epoch": 0.18594926979246734, "grad_norm": 0.6242038607597351, "learning_rate": 9.503835516534376e-05, "loss": 3.2717, "step": 2268 }, { "epoch": 0.18611324622085576, "grad_norm": 0.6092646718025208, "learning_rate": 9.502657326662637e-05, "loss": 3.2605, "step": 2270 }, { "epoch": 0.18627722264924418, "grad_norm": 0.5553577542304993, "learning_rate": 9.501477812795094e-05, "loss": 3.2083, "step": 2272 }, { "epoch": 0.1864411990776326, "grad_norm": 0.5823682546615601, "learning_rate": 9.500296975278581e-05, "loss": 3.2408, "step": 2274 }, { "epoch": 0.18660517550602101, "grad_norm": 0.6024842858314514, "learning_rate": 9.499114814460323e-05, "loss": 3.2654, "step": 2276 }, { "epoch": 0.18676915193440943, "grad_norm": 0.6093845367431641, "learning_rate": 9.497931330687926e-05, "loss": 3.2261, "step": 2278 }, { "epoch": 0.18693312836279785, "grad_norm": 0.5323441624641418, "learning_rate": 9.496746524309396e-05, "loss": 3.286, "step": 2280 }, { "epoch": 0.18709710479118627, "grad_norm": 0.6634844541549683, "learning_rate": 9.495560395673119e-05, "loss": 3.2319, "step": 2282 }, { "epoch": 0.1872610812195747, "grad_norm": 0.5159028172492981, "learning_rate": 9.494372945127873e-05, "loss": 3.2609, "step": 2284 }, { "epoch": 0.1874250576479631, "grad_norm": 0.6331459283828735, "learning_rate": 9.493184173022829e-05, "loss": 3.2177, "step": 2286 }, { "epoch": 0.18758903407635152, "grad_norm": 0.7428011894226074, "learning_rate": 9.49199407970754e-05, "loss": 3.2783, "step": 2288 }, { "epoch": 0.18775301050473994, "grad_norm": 0.5949831604957581, "learning_rate": 9.49080266553195e-05, "loss": 3.2425, "step": 2290 }, { "epoch": 0.18791698693312836, "grad_norm": 0.7588968873023987, "learning_rate": 9.489609930846391e-05, "loss": 3.2672, "step": 2292 }, { "epoch": 0.18808096336151678, "grad_norm": 0.592046320438385, "learning_rate": 9.488415876001586e-05, "loss": 3.2341, "step": 2294 }, { "epoch": 0.1882449397899052, "grad_norm": 0.5699316263198853, "learning_rate": 9.487220501348642e-05, "loss": 3.2835, "step": 2296 }, { "epoch": 0.18840891621829362, "grad_norm": 0.634863555431366, "learning_rate": 9.486023807239057e-05, "loss": 3.2315, "step": 2298 }, { "epoch": 0.18857289264668203, "grad_norm": 0.5566238760948181, "learning_rate": 9.484825794024716e-05, "loss": 3.1947, "step": 2300 }, { "epoch": 0.18873686907507045, "grad_norm": 0.6761863231658936, "learning_rate": 9.483626462057893e-05, "loss": 3.2531, "step": 2302 }, { "epoch": 0.18890084550345887, "grad_norm": 0.561553955078125, "learning_rate": 9.482425811691247e-05, "loss": 3.1987, "step": 2304 }, { "epoch": 0.1890648219318473, "grad_norm": 0.7657498121261597, "learning_rate": 9.481223843277827e-05, "loss": 3.2818, "step": 2306 }, { "epoch": 0.1892287983602357, "grad_norm": 0.5893799066543579, "learning_rate": 9.480020557171068e-05, "loss": 3.2392, "step": 2308 }, { "epoch": 0.18939277478862412, "grad_norm": 0.6204321980476379, "learning_rate": 9.478815953724796e-05, "loss": 3.1719, "step": 2310 }, { "epoch": 0.18955675121701254, "grad_norm": 0.5662544965744019, "learning_rate": 9.47761003329322e-05, "loss": 3.1849, "step": 2312 }, { "epoch": 0.18972072764540096, "grad_norm": 0.5701155662536621, "learning_rate": 9.476402796230938e-05, "loss": 3.2727, "step": 2314 }, { "epoch": 0.1898847040737894, "grad_norm": 0.5828278064727783, "learning_rate": 9.475194242892936e-05, "loss": 3.2154, "step": 2316 }, { "epoch": 0.19004868050217782, "grad_norm": 0.6154817342758179, "learning_rate": 9.473984373634586e-05, "loss": 3.2404, "step": 2318 }, { "epoch": 0.19021265693056624, "grad_norm": 0.5927799940109253, "learning_rate": 9.472773188811647e-05, "loss": 3.2507, "step": 2320 }, { "epoch": 0.19037663335895466, "grad_norm": 0.5183115601539612, "learning_rate": 9.471560688780266e-05, "loss": 3.2129, "step": 2322 }, { "epoch": 0.19054060978734308, "grad_norm": 0.5379958748817444, "learning_rate": 9.470346873896974e-05, "loss": 3.2855, "step": 2324 }, { "epoch": 0.1907045862157315, "grad_norm": 0.5806155204772949, "learning_rate": 9.46913174451869e-05, "loss": 3.2789, "step": 2326 }, { "epoch": 0.19086856264411992, "grad_norm": 0.5110951066017151, "learning_rate": 9.467915301002723e-05, "loss": 3.1858, "step": 2328 }, { "epoch": 0.19103253907250833, "grad_norm": 0.6017136573791504, "learning_rate": 9.466697543706764e-05, "loss": 3.214, "step": 2330 }, { "epoch": 0.19119651550089675, "grad_norm": 0.620817244052887, "learning_rate": 9.46547847298889e-05, "loss": 3.2424, "step": 2332 }, { "epoch": 0.19136049192928517, "grad_norm": 0.5650565028190613, "learning_rate": 9.464258089207569e-05, "loss": 3.1888, "step": 2334 }, { "epoch": 0.1915244683576736, "grad_norm": 0.690185546875, "learning_rate": 9.46303639272165e-05, "loss": 3.2409, "step": 2336 }, { "epoch": 0.191688444786062, "grad_norm": 0.5948742032051086, "learning_rate": 9.461813383890367e-05, "loss": 3.2372, "step": 2338 }, { "epoch": 0.19185242121445042, "grad_norm": 0.6050938367843628, "learning_rate": 9.460589063073349e-05, "loss": 3.1971, "step": 2340 }, { "epoch": 0.19201639764283884, "grad_norm": 0.6243955492973328, "learning_rate": 9.459363430630601e-05, "loss": 3.2438, "step": 2342 }, { "epoch": 0.19218037407122726, "grad_norm": 0.6408066153526306, "learning_rate": 9.458136486922519e-05, "loss": 3.2632, "step": 2344 }, { "epoch": 0.19234435049961568, "grad_norm": 0.6085670590400696, "learning_rate": 9.45690823230988e-05, "loss": 3.2042, "step": 2346 }, { "epoch": 0.1925083269280041, "grad_norm": 0.6089074015617371, "learning_rate": 9.455678667153853e-05, "loss": 3.1707, "step": 2348 }, { "epoch": 0.19267230335639252, "grad_norm": 0.572694718837738, "learning_rate": 9.454447791815986e-05, "loss": 3.1984, "step": 2350 }, { "epoch": 0.19283627978478093, "grad_norm": 0.5600984692573547, "learning_rate": 9.453215606658217e-05, "loss": 3.2981, "step": 2352 }, { "epoch": 0.19300025621316935, "grad_norm": 0.6271937489509583, "learning_rate": 9.451982112042866e-05, "loss": 3.2026, "step": 2354 }, { "epoch": 0.19316423264155777, "grad_norm": 0.5863776803016663, "learning_rate": 9.450747308332639e-05, "loss": 3.1766, "step": 2356 }, { "epoch": 0.1933282090699462, "grad_norm": 0.545541524887085, "learning_rate": 9.449511195890628e-05, "loss": 3.2055, "step": 2358 }, { "epoch": 0.1934921854983346, "grad_norm": 0.5626051425933838, "learning_rate": 9.44827377508031e-05, "loss": 3.2633, "step": 2360 }, { "epoch": 0.19365616192672302, "grad_norm": 0.5464023351669312, "learning_rate": 9.44703504626554e-05, "loss": 3.2158, "step": 2362 }, { "epoch": 0.19382013835511144, "grad_norm": 0.57725989818573, "learning_rate": 9.445795009810572e-05, "loss": 3.2539, "step": 2364 }, { "epoch": 0.19398411478349986, "grad_norm": 0.5377549529075623, "learning_rate": 9.444553666080029e-05, "loss": 3.2038, "step": 2366 }, { "epoch": 0.19414809121188828, "grad_norm": 0.5789408087730408, "learning_rate": 9.443311015438927e-05, "loss": 3.2505, "step": 2368 }, { "epoch": 0.1943120676402767, "grad_norm": 0.5615429878234863, "learning_rate": 9.442067058252666e-05, "loss": 3.2348, "step": 2370 }, { "epoch": 0.19447604406866512, "grad_norm": 0.5808910131454468, "learning_rate": 9.440821794887028e-05, "loss": 3.2645, "step": 2372 }, { "epoch": 0.19464002049705356, "grad_norm": 0.5342815518379211, "learning_rate": 9.439575225708179e-05, "loss": 3.1556, "step": 2374 }, { "epoch": 0.19480399692544198, "grad_norm": 0.6104359030723572, "learning_rate": 9.438327351082669e-05, "loss": 3.1896, "step": 2376 }, { "epoch": 0.1949679733538304, "grad_norm": 0.47551843523979187, "learning_rate": 9.437078171377437e-05, "loss": 3.236, "step": 2378 }, { "epoch": 0.19513194978221882, "grad_norm": 0.5371410846710205, "learning_rate": 9.435827686959795e-05, "loss": 3.2451, "step": 2380 }, { "epoch": 0.19529592621060723, "grad_norm": 0.5497537851333618, "learning_rate": 9.43457589819745e-05, "loss": 3.1947, "step": 2382 }, { "epoch": 0.19545990263899565, "grad_norm": 0.6186292767524719, "learning_rate": 9.433322805458484e-05, "loss": 3.242, "step": 2384 }, { "epoch": 0.19562387906738407, "grad_norm": 0.6111587882041931, "learning_rate": 9.43206840911137e-05, "loss": 3.2098, "step": 2386 }, { "epoch": 0.1957878554957725, "grad_norm": 0.636885941028595, "learning_rate": 9.430812709524956e-05, "loss": 3.2636, "step": 2388 }, { "epoch": 0.1959518319241609, "grad_norm": 0.5833829045295715, "learning_rate": 9.42955570706848e-05, "loss": 3.147, "step": 2390 }, { "epoch": 0.19611580835254933, "grad_norm": 0.645831823348999, "learning_rate": 9.42829740211156e-05, "loss": 3.2301, "step": 2392 }, { "epoch": 0.19627978478093774, "grad_norm": 0.5893756151199341, "learning_rate": 9.427037795024199e-05, "loss": 3.2677, "step": 2394 }, { "epoch": 0.19644376120932616, "grad_norm": 0.5982114672660828, "learning_rate": 9.425776886176778e-05, "loss": 3.1879, "step": 2396 }, { "epoch": 0.19660773763771458, "grad_norm": 0.6196883916854858, "learning_rate": 9.424514675940068e-05, "loss": 3.1708, "step": 2398 }, { "epoch": 0.196771714066103, "grad_norm": 0.5512893199920654, "learning_rate": 9.423251164685217e-05, "loss": 3.1997, "step": 2400 }, { "epoch": 0.19693569049449142, "grad_norm": 0.6537207961082458, "learning_rate": 9.421986352783759e-05, "loss": 3.2384, "step": 2402 }, { "epoch": 0.19709966692287983, "grad_norm": 0.572920024394989, "learning_rate": 9.420720240607606e-05, "loss": 3.1938, "step": 2404 }, { "epoch": 0.19726364335126825, "grad_norm": 0.5719939470291138, "learning_rate": 9.419452828529058e-05, "loss": 3.2079, "step": 2406 }, { "epoch": 0.19742761977965667, "grad_norm": 0.5642483234405518, "learning_rate": 9.418184116920794e-05, "loss": 3.2037, "step": 2408 }, { "epoch": 0.1975915962080451, "grad_norm": 0.4986971914768219, "learning_rate": 9.416914106155875e-05, "loss": 3.1913, "step": 2410 }, { "epoch": 0.1977555726364335, "grad_norm": 0.5548354983329773, "learning_rate": 9.415642796607746e-05, "loss": 3.2272, "step": 2412 }, { "epoch": 0.19791954906482193, "grad_norm": 0.6837654113769531, "learning_rate": 9.414370188650231e-05, "loss": 3.2308, "step": 2414 }, { "epoch": 0.19808352549321034, "grad_norm": 0.7141901850700378, "learning_rate": 9.413096282657538e-05, "loss": 3.1775, "step": 2416 }, { "epoch": 0.19824750192159876, "grad_norm": 0.7015743851661682, "learning_rate": 9.411821079004258e-05, "loss": 3.1921, "step": 2418 }, { "epoch": 0.19841147834998718, "grad_norm": 0.5208891034126282, "learning_rate": 9.410544578065358e-05, "loss": 3.1988, "step": 2420 }, { "epoch": 0.1985754547783756, "grad_norm": 0.6202064752578735, "learning_rate": 9.409266780216191e-05, "loss": 3.1672, "step": 2422 }, { "epoch": 0.19873943120676402, "grad_norm": 0.670427143573761, "learning_rate": 9.407987685832493e-05, "loss": 3.2011, "step": 2424 }, { "epoch": 0.19890340763515243, "grad_norm": 0.6327905058860779, "learning_rate": 9.406707295290377e-05, "loss": 3.216, "step": 2426 }, { "epoch": 0.19906738406354085, "grad_norm": 0.6474474668502808, "learning_rate": 9.405425608966338e-05, "loss": 3.2229, "step": 2428 }, { "epoch": 0.19923136049192927, "grad_norm": 0.7056578993797302, "learning_rate": 9.404142627237255e-05, "loss": 3.2508, "step": 2430 }, { "epoch": 0.19939533692031772, "grad_norm": 0.5632738471031189, "learning_rate": 9.402858350480383e-05, "loss": 3.1891, "step": 2432 }, { "epoch": 0.19955931334870614, "grad_norm": 0.5663987994194031, "learning_rate": 9.401572779073363e-05, "loss": 3.1317, "step": 2434 }, { "epoch": 0.19972328977709455, "grad_norm": 0.6044129729270935, "learning_rate": 9.400285913394213e-05, "loss": 3.1676, "step": 2436 }, { "epoch": 0.19988726620548297, "grad_norm": 0.5997135639190674, "learning_rate": 9.398997753821334e-05, "loss": 3.2122, "step": 2438 }, { "epoch": 0.2000512426338714, "grad_norm": 0.5731354355812073, "learning_rate": 9.397708300733503e-05, "loss": 3.146, "step": 2440 }, { "epoch": 0.2002152190622598, "grad_norm": 0.5749174356460571, "learning_rate": 9.396417554509885e-05, "loss": 3.2533, "step": 2442 }, { "epoch": 0.20037919549064823, "grad_norm": 0.6123077869415283, "learning_rate": 9.395125515530019e-05, "loss": 3.2046, "step": 2444 }, { "epoch": 0.20054317191903664, "grad_norm": 0.5596455335617065, "learning_rate": 9.393832184173826e-05, "loss": 3.2109, "step": 2446 }, { "epoch": 0.20070714834742506, "grad_norm": 0.6029247641563416, "learning_rate": 9.392537560821606e-05, "loss": 3.2487, "step": 2448 }, { "epoch": 0.20087112477581348, "grad_norm": 0.5740619897842407, "learning_rate": 9.391241645854041e-05, "loss": 3.1954, "step": 2450 }, { "epoch": 0.2010351012042019, "grad_norm": 0.4807377755641937, "learning_rate": 9.389944439652194e-05, "loss": 3.232, "step": 2452 }, { "epoch": 0.20119907763259032, "grad_norm": 0.4954237937927246, "learning_rate": 9.388645942597501e-05, "loss": 3.1604, "step": 2454 }, { "epoch": 0.20136305406097874, "grad_norm": 0.5956186652183533, "learning_rate": 9.387346155071785e-05, "loss": 3.2222, "step": 2456 }, { "epoch": 0.20152703048936715, "grad_norm": 0.5427073240280151, "learning_rate": 9.386045077457244e-05, "loss": 3.199, "step": 2458 }, { "epoch": 0.20169100691775557, "grad_norm": 0.6691755056381226, "learning_rate": 9.384742710136458e-05, "loss": 3.1441, "step": 2460 }, { "epoch": 0.201854983346144, "grad_norm": 0.5961546301841736, "learning_rate": 9.383439053492384e-05, "loss": 3.2128, "step": 2462 }, { "epoch": 0.2020189597745324, "grad_norm": 0.627190887928009, "learning_rate": 9.38213410790836e-05, "loss": 3.2204, "step": 2464 }, { "epoch": 0.20218293620292083, "grad_norm": 0.6342671513557434, "learning_rate": 9.380827873768101e-05, "loss": 3.1279, "step": 2466 }, { "epoch": 0.20234691263130924, "grad_norm": 0.5942181944847107, "learning_rate": 9.379520351455705e-05, "loss": 3.204, "step": 2468 }, { "epoch": 0.20251088905969766, "grad_norm": 0.5538034439086914, "learning_rate": 9.378211541355643e-05, "loss": 3.2224, "step": 2470 }, { "epoch": 0.20267486548808608, "grad_norm": 0.5361983776092529, "learning_rate": 9.376901443852767e-05, "loss": 3.1103, "step": 2472 }, { "epoch": 0.2028388419164745, "grad_norm": 0.6236636638641357, "learning_rate": 9.375590059332311e-05, "loss": 3.2047, "step": 2474 }, { "epoch": 0.20300281834486292, "grad_norm": 0.5351163148880005, "learning_rate": 9.374277388179882e-05, "loss": 3.2153, "step": 2476 }, { "epoch": 0.20316679477325134, "grad_norm": 0.5665524005889893, "learning_rate": 9.37296343078147e-05, "loss": 3.1779, "step": 2478 }, { "epoch": 0.20333077120163975, "grad_norm": 0.5840953588485718, "learning_rate": 9.371648187523439e-05, "loss": 3.1732, "step": 2480 }, { "epoch": 0.20349474763002817, "grad_norm": 0.5002064108848572, "learning_rate": 9.370331658792534e-05, "loss": 3.1473, "step": 2482 }, { "epoch": 0.2036587240584166, "grad_norm": 0.5152116417884827, "learning_rate": 9.369013844975878e-05, "loss": 3.1973, "step": 2484 }, { "epoch": 0.203822700486805, "grad_norm": 0.5382422208786011, "learning_rate": 9.367694746460969e-05, "loss": 3.1354, "step": 2486 }, { "epoch": 0.20398667691519345, "grad_norm": 0.5114589929580688, "learning_rate": 9.366374363635688e-05, "loss": 3.1416, "step": 2488 }, { "epoch": 0.20415065334358187, "grad_norm": 0.5633603930473328, "learning_rate": 9.365052696888288e-05, "loss": 3.1328, "step": 2490 }, { "epoch": 0.2043146297719703, "grad_norm": 0.6122515201568604, "learning_rate": 9.363729746607401e-05, "loss": 3.2152, "step": 2492 }, { "epoch": 0.2044786062003587, "grad_norm": 0.49608293175697327, "learning_rate": 9.36240551318204e-05, "loss": 3.1354, "step": 2494 }, { "epoch": 0.20464258262874713, "grad_norm": 0.6546223759651184, "learning_rate": 9.361079997001592e-05, "loss": 3.2126, "step": 2496 }, { "epoch": 0.20480655905713555, "grad_norm": 0.6353023648262024, "learning_rate": 9.359753198455823e-05, "loss": 3.1782, "step": 2498 }, { "epoch": 0.20497053548552396, "grad_norm": 0.5790070295333862, "learning_rate": 9.358425117934873e-05, "loss": 3.1635, "step": 2500 }, { "epoch": 0.20513451191391238, "grad_norm": 0.5316998958587646, "learning_rate": 9.357095755829259e-05, "loss": 3.1684, "step": 2502 }, { "epoch": 0.2052984883423008, "grad_norm": 0.5418469905853271, "learning_rate": 9.355765112529882e-05, "loss": 3.2322, "step": 2504 }, { "epoch": 0.20546246477068922, "grad_norm": 0.5909755229949951, "learning_rate": 9.35443318842801e-05, "loss": 3.1637, "step": 2506 }, { "epoch": 0.20562644119907764, "grad_norm": 0.5913002490997314, "learning_rate": 9.353099983915298e-05, "loss": 3.1568, "step": 2508 }, { "epoch": 0.20579041762746605, "grad_norm": 0.5578615665435791, "learning_rate": 9.351765499383764e-05, "loss": 3.1382, "step": 2510 }, { "epoch": 0.20595439405585447, "grad_norm": 0.5887861251831055, "learning_rate": 9.350429735225816e-05, "loss": 3.1946, "step": 2512 }, { "epoch": 0.2061183704842429, "grad_norm": 0.5453567504882812, "learning_rate": 9.34909269183423e-05, "loss": 3.1474, "step": 2514 }, { "epoch": 0.2062823469126313, "grad_norm": 0.5504783987998962, "learning_rate": 9.34775436960216e-05, "loss": 3.1371, "step": 2516 }, { "epoch": 0.20644632334101973, "grad_norm": 0.5522333979606628, "learning_rate": 9.346414768923138e-05, "loss": 3.1736, "step": 2518 }, { "epoch": 0.20661029976940815, "grad_norm": 0.5545846223831177, "learning_rate": 9.345073890191067e-05, "loss": 3.155, "step": 2520 }, { "epoch": 0.20677427619779656, "grad_norm": 0.5755533576011658, "learning_rate": 9.343731733800235e-05, "loss": 3.1211, "step": 2522 }, { "epoch": 0.20693825262618498, "grad_norm": 0.6164469122886658, "learning_rate": 9.342388300145294e-05, "loss": 3.1781, "step": 2524 }, { "epoch": 0.2071022290545734, "grad_norm": 0.5937029123306274, "learning_rate": 9.341043589621282e-05, "loss": 3.2039, "step": 2526 }, { "epoch": 0.20726620548296182, "grad_norm": 0.5489475727081299, "learning_rate": 9.339697602623605e-05, "loss": 3.1502, "step": 2528 }, { "epoch": 0.20743018191135024, "grad_norm": 0.6091250777244568, "learning_rate": 9.338350339548048e-05, "loss": 3.1774, "step": 2530 }, { "epoch": 0.20759415833973865, "grad_norm": 0.5674654841423035, "learning_rate": 9.337001800790773e-05, "loss": 3.1535, "step": 2532 }, { "epoch": 0.20775813476812707, "grad_norm": 0.644279420375824, "learning_rate": 9.33565198674831e-05, "loss": 3.1406, "step": 2534 }, { "epoch": 0.2079221111965155, "grad_norm": 0.6195595264434814, "learning_rate": 9.334300897817574e-05, "loss": 3.1527, "step": 2536 }, { "epoch": 0.2080860876249039, "grad_norm": 0.5304683446884155, "learning_rate": 9.332948534395846e-05, "loss": 3.1957, "step": 2538 }, { "epoch": 0.20825006405329233, "grad_norm": 0.6691213250160217, "learning_rate": 9.331594896880787e-05, "loss": 3.2468, "step": 2540 }, { "epoch": 0.20841404048168075, "grad_norm": 0.5579569339752197, "learning_rate": 9.330239985670427e-05, "loss": 3.1475, "step": 2542 }, { "epoch": 0.20857801691006916, "grad_norm": 0.6016284227371216, "learning_rate": 9.328883801163181e-05, "loss": 3.1298, "step": 2544 }, { "epoch": 0.2087419933384576, "grad_norm": 0.5903862714767456, "learning_rate": 9.327526343757826e-05, "loss": 3.1804, "step": 2546 }, { "epoch": 0.20890596976684603, "grad_norm": 0.5137822031974792, "learning_rate": 9.326167613853523e-05, "loss": 3.1662, "step": 2548 }, { "epoch": 0.20906994619523445, "grad_norm": 0.5315471887588501, "learning_rate": 9.324807611849802e-05, "loss": 3.2222, "step": 2550 }, { "epoch": 0.20923392262362286, "grad_norm": 0.5678295493125916, "learning_rate": 9.323446338146568e-05, "loss": 3.147, "step": 2552 }, { "epoch": 0.20939789905201128, "grad_norm": 0.5671442151069641, "learning_rate": 9.322083793144101e-05, "loss": 3.1514, "step": 2554 }, { "epoch": 0.2095618754803997, "grad_norm": 0.5480635166168213, "learning_rate": 9.320719977243052e-05, "loss": 3.1943, "step": 2556 }, { "epoch": 0.20972585190878812, "grad_norm": 0.573996901512146, "learning_rate": 9.319354890844451e-05, "loss": 3.1084, "step": 2558 }, { "epoch": 0.20988982833717654, "grad_norm": 0.5476592183113098, "learning_rate": 9.317988534349697e-05, "loss": 3.1328, "step": 2560 }, { "epoch": 0.21005380476556496, "grad_norm": 0.5603650808334351, "learning_rate": 9.316620908160562e-05, "loss": 3.256, "step": 2562 }, { "epoch": 0.21021778119395337, "grad_norm": 0.5470094680786133, "learning_rate": 9.315252012679198e-05, "loss": 3.2453, "step": 2564 }, { "epoch": 0.2103817576223418, "grad_norm": 0.5147728323936462, "learning_rate": 9.313881848308123e-05, "loss": 3.2067, "step": 2566 }, { "epoch": 0.2105457340507302, "grad_norm": 0.5771604776382446, "learning_rate": 9.312510415450228e-05, "loss": 3.1415, "step": 2568 }, { "epoch": 0.21070971047911863, "grad_norm": 0.5814144015312195, "learning_rate": 9.311137714508785e-05, "loss": 3.1481, "step": 2570 }, { "epoch": 0.21087368690750705, "grad_norm": 0.589153528213501, "learning_rate": 9.309763745887428e-05, "loss": 3.1297, "step": 2572 }, { "epoch": 0.21103766333589546, "grad_norm": 0.5519060492515564, "learning_rate": 9.308388509990171e-05, "loss": 3.1409, "step": 2574 }, { "epoch": 0.21120163976428388, "grad_norm": 0.5374418497085571, "learning_rate": 9.307012007221401e-05, "loss": 3.2072, "step": 2576 }, { "epoch": 0.2113656161926723, "grad_norm": 0.5539153218269348, "learning_rate": 9.305634237985874e-05, "loss": 3.1928, "step": 2578 }, { "epoch": 0.21152959262106072, "grad_norm": 0.47456660866737366, "learning_rate": 9.304255202688721e-05, "loss": 3.1669, "step": 2580 }, { "epoch": 0.21169356904944914, "grad_norm": 0.5018851161003113, "learning_rate": 9.302874901735441e-05, "loss": 3.1694, "step": 2582 }, { "epoch": 0.21185754547783756, "grad_norm": 0.5462913513183594, "learning_rate": 9.301493335531911e-05, "loss": 3.1753, "step": 2584 }, { "epoch": 0.21202152190622597, "grad_norm": 0.5394952297210693, "learning_rate": 9.300110504484377e-05, "loss": 3.1824, "step": 2586 }, { "epoch": 0.2121854983346144, "grad_norm": 0.6784063577651978, "learning_rate": 9.298726408999455e-05, "loss": 3.1717, "step": 2588 }, { "epoch": 0.2123494747630028, "grad_norm": 0.5093061923980713, "learning_rate": 9.297341049484139e-05, "loss": 3.1197, "step": 2590 }, { "epoch": 0.21251345119139123, "grad_norm": 0.5276237726211548, "learning_rate": 9.295954426345786e-05, "loss": 3.1307, "step": 2592 }, { "epoch": 0.21267742761977965, "grad_norm": 0.6057010293006897, "learning_rate": 9.294566539992132e-05, "loss": 3.1619, "step": 2594 }, { "epoch": 0.21284140404816806, "grad_norm": 0.6017722487449646, "learning_rate": 9.293177390831282e-05, "loss": 3.1196, "step": 2596 }, { "epoch": 0.21300538047655648, "grad_norm": 0.5458320379257202, "learning_rate": 9.291786979271712e-05, "loss": 3.1665, "step": 2598 }, { "epoch": 0.2131693569049449, "grad_norm": 0.6224083304405212, "learning_rate": 9.290395305722269e-05, "loss": 3.1768, "step": 2600 }, { "epoch": 0.21333333333333335, "grad_norm": 0.5466166138648987, "learning_rate": 9.289002370592168e-05, "loss": 3.135, "step": 2602 }, { "epoch": 0.21349730976172177, "grad_norm": 0.6690223217010498, "learning_rate": 9.287608174291004e-05, "loss": 3.2039, "step": 2604 }, { "epoch": 0.21366128619011018, "grad_norm": 0.6225460767745972, "learning_rate": 9.286212717228734e-05, "loss": 3.1377, "step": 2606 }, { "epoch": 0.2138252626184986, "grad_norm": 0.6038724184036255, "learning_rate": 9.28481599981569e-05, "loss": 3.1148, "step": 2608 }, { "epoch": 0.21398923904688702, "grad_norm": 0.5335320830345154, "learning_rate": 9.283418022462571e-05, "loss": 3.1726, "step": 2610 }, { "epoch": 0.21415321547527544, "grad_norm": 0.5151216387748718, "learning_rate": 9.282018785580452e-05, "loss": 3.1839, "step": 2612 }, { "epoch": 0.21431719190366386, "grad_norm": 0.5432109236717224, "learning_rate": 9.280618289580773e-05, "loss": 3.1388, "step": 2614 }, { "epoch": 0.21448116833205227, "grad_norm": 0.4836788475513458, "learning_rate": 9.27921653487535e-05, "loss": 3.133, "step": 2616 }, { "epoch": 0.2146451447604407, "grad_norm": 0.5969836115837097, "learning_rate": 9.277813521876361e-05, "loss": 3.1967, "step": 2618 }, { "epoch": 0.2148091211888291, "grad_norm": 0.5812448859214783, "learning_rate": 9.276409250996362e-05, "loss": 3.0804, "step": 2620 }, { "epoch": 0.21497309761721753, "grad_norm": 0.6391003131866455, "learning_rate": 9.275003722648274e-05, "loss": 3.1616, "step": 2622 }, { "epoch": 0.21513707404560595, "grad_norm": 0.6133304834365845, "learning_rate": 9.27359693724539e-05, "loss": 3.1099, "step": 2624 }, { "epoch": 0.21530105047399437, "grad_norm": 0.7073734402656555, "learning_rate": 9.272188895201372e-05, "loss": 3.2123, "step": 2626 }, { "epoch": 0.21546502690238278, "grad_norm": 0.6187078952789307, "learning_rate": 9.270779596930252e-05, "loss": 3.1732, "step": 2628 }, { "epoch": 0.2156290033307712, "grad_norm": 0.5692609548568726, "learning_rate": 9.269369042846428e-05, "loss": 3.1112, "step": 2630 }, { "epoch": 0.21579297975915962, "grad_norm": 0.6214010715484619, "learning_rate": 9.267957233364674e-05, "loss": 3.1889, "step": 2632 }, { "epoch": 0.21595695618754804, "grad_norm": 0.575520932674408, "learning_rate": 9.266544168900126e-05, "loss": 3.1127, "step": 2634 }, { "epoch": 0.21612093261593646, "grad_norm": 0.5261242985725403, "learning_rate": 9.265129849868294e-05, "loss": 3.1123, "step": 2636 }, { "epoch": 0.21628490904432487, "grad_norm": 0.5849714279174805, "learning_rate": 9.263714276685056e-05, "loss": 3.1153, "step": 2638 }, { "epoch": 0.2164488854727133, "grad_norm": 0.563801109790802, "learning_rate": 9.262297449766657e-05, "loss": 3.1947, "step": 2640 }, { "epoch": 0.2166128619011017, "grad_norm": 0.5794183611869812, "learning_rate": 9.260879369529711e-05, "loss": 3.1205, "step": 2642 }, { "epoch": 0.21677683832949013, "grad_norm": 0.6427027583122253, "learning_rate": 9.259460036391201e-05, "loss": 3.1642, "step": 2644 }, { "epoch": 0.21694081475787855, "grad_norm": 0.5406637191772461, "learning_rate": 9.25803945076848e-05, "loss": 3.1695, "step": 2646 }, { "epoch": 0.21710479118626697, "grad_norm": 0.5383151769638062, "learning_rate": 9.256617613079267e-05, "loss": 3.0954, "step": 2648 }, { "epoch": 0.21726876761465538, "grad_norm": 0.6771288514137268, "learning_rate": 9.25519452374165e-05, "loss": 3.1934, "step": 2650 }, { "epoch": 0.2174327440430438, "grad_norm": 0.639716386795044, "learning_rate": 9.253770183174085e-05, "loss": 3.1192, "step": 2652 }, { "epoch": 0.21759672047143222, "grad_norm": 0.5584697127342224, "learning_rate": 9.252344591795396e-05, "loss": 3.2127, "step": 2654 }, { "epoch": 0.21776069689982064, "grad_norm": 0.5381549000740051, "learning_rate": 9.250917750024777e-05, "loss": 3.1321, "step": 2656 }, { "epoch": 0.21792467332820906, "grad_norm": 0.5931708216667175, "learning_rate": 9.249489658281783e-05, "loss": 3.1762, "step": 2658 }, { "epoch": 0.2180886497565975, "grad_norm": 0.5969710946083069, "learning_rate": 9.248060316986344e-05, "loss": 3.1158, "step": 2660 }, { "epoch": 0.21825262618498592, "grad_norm": 0.6363986134529114, "learning_rate": 9.246629726558756e-05, "loss": 3.242, "step": 2662 }, { "epoch": 0.21841660261337434, "grad_norm": 0.540717363357544, "learning_rate": 9.245197887419676e-05, "loss": 3.1661, "step": 2664 }, { "epoch": 0.21858057904176276, "grad_norm": 0.589412271976471, "learning_rate": 9.243764799990136e-05, "loss": 3.1018, "step": 2666 }, { "epoch": 0.21874455547015118, "grad_norm": 0.5175191164016724, "learning_rate": 9.242330464691533e-05, "loss": 3.1773, "step": 2668 }, { "epoch": 0.2189085318985396, "grad_norm": 0.5175068974494934, "learning_rate": 9.240894881945627e-05, "loss": 3.1566, "step": 2670 }, { "epoch": 0.219072508326928, "grad_norm": 0.48125573992729187, "learning_rate": 9.239458052174551e-05, "loss": 3.106, "step": 2672 }, { "epoch": 0.21923648475531643, "grad_norm": 0.6103034019470215, "learning_rate": 9.238019975800799e-05, "loss": 3.1316, "step": 2674 }, { "epoch": 0.21940046118370485, "grad_norm": 0.5315214991569519, "learning_rate": 9.236580653247235e-05, "loss": 3.0961, "step": 2676 }, { "epoch": 0.21956443761209327, "grad_norm": 0.5600281357765198, "learning_rate": 9.235140084937086e-05, "loss": 3.0966, "step": 2678 }, { "epoch": 0.21972841404048168, "grad_norm": 0.5466108322143555, "learning_rate": 9.233698271293953e-05, "loss": 3.1325, "step": 2680 }, { "epoch": 0.2198923904688701, "grad_norm": 0.5320989489555359, "learning_rate": 9.232255212741792e-05, "loss": 3.1617, "step": 2682 }, { "epoch": 0.22005636689725852, "grad_norm": 0.4955200254917145, "learning_rate": 9.230810909704934e-05, "loss": 3.1351, "step": 2684 }, { "epoch": 0.22022034332564694, "grad_norm": 0.5174024105072021, "learning_rate": 9.229365362608074e-05, "loss": 3.1993, "step": 2686 }, { "epoch": 0.22038431975403536, "grad_norm": 0.576806366443634, "learning_rate": 9.22791857187627e-05, "loss": 3.1945, "step": 2688 }, { "epoch": 0.22054829618242378, "grad_norm": 0.6114248633384705, "learning_rate": 9.226470537934948e-05, "loss": 3.1272, "step": 2690 }, { "epoch": 0.2207122726108122, "grad_norm": 0.6304234266281128, "learning_rate": 9.225021261209898e-05, "loss": 3.1276, "step": 2692 }, { "epoch": 0.2208762490392006, "grad_norm": 0.5603763461112976, "learning_rate": 9.223570742127278e-05, "loss": 3.068, "step": 2694 }, { "epoch": 0.22104022546758903, "grad_norm": 0.5506424307823181, "learning_rate": 9.222118981113607e-05, "loss": 3.1429, "step": 2696 }, { "epoch": 0.22120420189597745, "grad_norm": 0.5758050084114075, "learning_rate": 9.220665978595775e-05, "loss": 3.137, "step": 2698 }, { "epoch": 0.22136817832436587, "grad_norm": 0.5011979341506958, "learning_rate": 9.219211735001034e-05, "loss": 3.1048, "step": 2700 }, { "epoch": 0.22153215475275428, "grad_norm": 0.65125972032547, "learning_rate": 9.217756250756996e-05, "loss": 3.1607, "step": 2702 }, { "epoch": 0.2216961311811427, "grad_norm": 0.5995632410049438, "learning_rate": 9.21629952629165e-05, "loss": 3.055, "step": 2704 }, { "epoch": 0.22186010760953112, "grad_norm": 0.7397205233573914, "learning_rate": 9.214841562033338e-05, "loss": 3.1355, "step": 2706 }, { "epoch": 0.22202408403791954, "grad_norm": 0.636867880821228, "learning_rate": 9.213382358410771e-05, "loss": 3.158, "step": 2708 }, { "epoch": 0.22218806046630796, "grad_norm": 0.6258612275123596, "learning_rate": 9.211921915853026e-05, "loss": 3.1435, "step": 2710 }, { "epoch": 0.22235203689469638, "grad_norm": 0.6227878332138062, "learning_rate": 9.210460234789542e-05, "loss": 3.115, "step": 2712 }, { "epoch": 0.2225160133230848, "grad_norm": 0.552367091178894, "learning_rate": 9.20899731565012e-05, "loss": 3.1096, "step": 2714 }, { "epoch": 0.22267998975147324, "grad_norm": 0.5357968211174011, "learning_rate": 9.207533158864934e-05, "loss": 3.1431, "step": 2716 }, { "epoch": 0.22284396617986166, "grad_norm": 0.5799859166145325, "learning_rate": 9.206067764864512e-05, "loss": 3.1239, "step": 2718 }, { "epoch": 0.22300794260825008, "grad_norm": 0.541117787361145, "learning_rate": 9.204601134079749e-05, "loss": 3.1551, "step": 2720 }, { "epoch": 0.2231719190366385, "grad_norm": 0.5454208254814148, "learning_rate": 9.203133266941906e-05, "loss": 3.1585, "step": 2722 }, { "epoch": 0.2233358954650269, "grad_norm": 0.6066946983337402, "learning_rate": 9.201664163882605e-05, "loss": 3.1415, "step": 2724 }, { "epoch": 0.22349987189341533, "grad_norm": 0.5827730298042297, "learning_rate": 9.200193825333833e-05, "loss": 3.0677, "step": 2726 }, { "epoch": 0.22366384832180375, "grad_norm": 0.6678103804588318, "learning_rate": 9.198722251727941e-05, "loss": 3.1344, "step": 2728 }, { "epoch": 0.22382782475019217, "grad_norm": 0.553477942943573, "learning_rate": 9.197249443497638e-05, "loss": 3.1403, "step": 2730 }, { "epoch": 0.22399180117858059, "grad_norm": 0.6305515766143799, "learning_rate": 9.195775401076001e-05, "loss": 3.1172, "step": 2732 }, { "epoch": 0.224155777606969, "grad_norm": 0.6065593957901001, "learning_rate": 9.194300124896471e-05, "loss": 3.1931, "step": 2734 }, { "epoch": 0.22431975403535742, "grad_norm": 0.5823774933815002, "learning_rate": 9.192823615392848e-05, "loss": 3.1133, "step": 2736 }, { "epoch": 0.22448373046374584, "grad_norm": 0.5522893667221069, "learning_rate": 9.191345872999297e-05, "loss": 3.111, "step": 2738 }, { "epoch": 0.22464770689213426, "grad_norm": 0.5573318600654602, "learning_rate": 9.189866898150343e-05, "loss": 3.1446, "step": 2740 }, { "epoch": 0.22481168332052268, "grad_norm": 0.5337832570075989, "learning_rate": 9.188386691280875e-05, "loss": 3.1285, "step": 2742 }, { "epoch": 0.2249756597489111, "grad_norm": 0.4911380112171173, "learning_rate": 9.186905252826147e-05, "loss": 3.0945, "step": 2744 }, { "epoch": 0.2251396361772995, "grad_norm": 0.546593189239502, "learning_rate": 9.18542258322177e-05, "loss": 3.1581, "step": 2746 }, { "epoch": 0.22530361260568793, "grad_norm": 0.5859017372131348, "learning_rate": 9.183938682903721e-05, "loss": 3.1265, "step": 2748 }, { "epoch": 0.22546758903407635, "grad_norm": 0.610236406326294, "learning_rate": 9.182453552308335e-05, "loss": 3.1202, "step": 2750 }, { "epoch": 0.22563156546246477, "grad_norm": 0.521246075630188, "learning_rate": 9.180967191872315e-05, "loss": 3.1225, "step": 2752 }, { "epoch": 0.22579554189085319, "grad_norm": 0.5413455367088318, "learning_rate": 9.179479602032719e-05, "loss": 3.1589, "step": 2754 }, { "epoch": 0.2259595183192416, "grad_norm": 0.5279058814048767, "learning_rate": 9.177990783226969e-05, "loss": 3.1119, "step": 2756 }, { "epoch": 0.22612349474763002, "grad_norm": 0.5241829752922058, "learning_rate": 9.17650073589285e-05, "loss": 3.0924, "step": 2758 }, { "epoch": 0.22628747117601844, "grad_norm": 0.5258579850196838, "learning_rate": 9.175009460468507e-05, "loss": 3.1179, "step": 2760 }, { "epoch": 0.22645144760440686, "grad_norm": 0.5945084095001221, "learning_rate": 9.173516957392446e-05, "loss": 3.1142, "step": 2762 }, { "epoch": 0.22661542403279528, "grad_norm": 0.49606871604919434, "learning_rate": 9.172023227103533e-05, "loss": 3.1269, "step": 2764 }, { "epoch": 0.2267794004611837, "grad_norm": 0.5690228343009949, "learning_rate": 9.170528270040996e-05, "loss": 3.1252, "step": 2766 }, { "epoch": 0.2269433768895721, "grad_norm": 0.5027005672454834, "learning_rate": 9.169032086644425e-05, "loss": 3.1549, "step": 2768 }, { "epoch": 0.22710735331796053, "grad_norm": 0.5609970092773438, "learning_rate": 9.167534677353768e-05, "loss": 3.1085, "step": 2770 }, { "epoch": 0.22727132974634895, "grad_norm": 0.5917537212371826, "learning_rate": 9.166036042609336e-05, "loss": 3.1457, "step": 2772 }, { "epoch": 0.2274353061747374, "grad_norm": 0.4679684042930603, "learning_rate": 9.164536182851797e-05, "loss": 3.1672, "step": 2774 }, { "epoch": 0.2275992826031258, "grad_norm": 0.5247348546981812, "learning_rate": 9.163035098522182e-05, "loss": 3.0683, "step": 2776 }, { "epoch": 0.22776325903151423, "grad_norm": 0.5608956813812256, "learning_rate": 9.161532790061882e-05, "loss": 3.093, "step": 2778 }, { "epoch": 0.22792723545990265, "grad_norm": 0.6004567742347717, "learning_rate": 9.160029257912646e-05, "loss": 3.0853, "step": 2780 }, { "epoch": 0.22809121188829107, "grad_norm": 0.4759785532951355, "learning_rate": 9.158524502516586e-05, "loss": 3.1141, "step": 2782 }, { "epoch": 0.22825518831667949, "grad_norm": 0.5630537867546082, "learning_rate": 9.15701852431617e-05, "loss": 3.112, "step": 2784 }, { "epoch": 0.2284191647450679, "grad_norm": 0.5082862377166748, "learning_rate": 9.15551132375423e-05, "loss": 3.0943, "step": 2786 }, { "epoch": 0.22858314117345632, "grad_norm": 0.5193647146224976, "learning_rate": 9.15400290127395e-05, "loss": 3.1019, "step": 2788 }, { "epoch": 0.22874711760184474, "grad_norm": 0.604705274105072, "learning_rate": 9.152493257318882e-05, "loss": 3.0763, "step": 2790 }, { "epoch": 0.22891109403023316, "grad_norm": 0.6304996013641357, "learning_rate": 9.150982392332932e-05, "loss": 3.1561, "step": 2792 }, { "epoch": 0.22907507045862158, "grad_norm": 0.6620004177093506, "learning_rate": 9.149470306760368e-05, "loss": 3.1256, "step": 2794 }, { "epoch": 0.22923904688701, "grad_norm": 0.5921337604522705, "learning_rate": 9.147957001045813e-05, "loss": 3.14, "step": 2796 }, { "epoch": 0.2294030233153984, "grad_norm": 0.5974056720733643, "learning_rate": 9.146442475634252e-05, "loss": 3.1025, "step": 2798 }, { "epoch": 0.22956699974378683, "grad_norm": 0.5777150988578796, "learning_rate": 9.144926730971027e-05, "loss": 3.0568, "step": 2800 }, { "epoch": 0.22973097617217525, "grad_norm": 0.5552829504013062, "learning_rate": 9.143409767501839e-05, "loss": 3.1045, "step": 2802 }, { "epoch": 0.22989495260056367, "grad_norm": 0.5311617255210876, "learning_rate": 9.141891585672748e-05, "loss": 3.1206, "step": 2804 }, { "epoch": 0.23005892902895209, "grad_norm": 0.5451520681381226, "learning_rate": 9.140372185930172e-05, "loss": 3.1293, "step": 2806 }, { "epoch": 0.2302229054573405, "grad_norm": 0.5763382315635681, "learning_rate": 9.138851568720886e-05, "loss": 3.1129, "step": 2808 }, { "epoch": 0.23038688188572892, "grad_norm": 0.5894972681999207, "learning_rate": 9.137329734492026e-05, "loss": 3.0796, "step": 2810 }, { "epoch": 0.23055085831411734, "grad_norm": 0.538650631904602, "learning_rate": 9.135806683691082e-05, "loss": 3.09, "step": 2812 }, { "epoch": 0.23071483474250576, "grad_norm": 0.5035321116447449, "learning_rate": 9.134282416765905e-05, "loss": 3.1168, "step": 2814 }, { "epoch": 0.23087881117089418, "grad_norm": 0.5038688778877258, "learning_rate": 9.132756934164699e-05, "loss": 3.1212, "step": 2816 }, { "epoch": 0.2310427875992826, "grad_norm": 0.53536057472229, "learning_rate": 9.131230236336032e-05, "loss": 3.118, "step": 2818 }, { "epoch": 0.231206764027671, "grad_norm": 0.5447813272476196, "learning_rate": 9.129702323728824e-05, "loss": 3.0866, "step": 2820 }, { "epoch": 0.23137074045605943, "grad_norm": 0.49734705686569214, "learning_rate": 9.128173196792355e-05, "loss": 3.1489, "step": 2822 }, { "epoch": 0.23153471688444785, "grad_norm": 0.5162733793258667, "learning_rate": 9.12664285597626e-05, "loss": 3.166, "step": 2824 }, { "epoch": 0.23169869331283627, "grad_norm": 0.47742417454719543, "learning_rate": 9.125111301730534e-05, "loss": 3.0757, "step": 2826 }, { "epoch": 0.2318626697412247, "grad_norm": 0.5511021018028259, "learning_rate": 9.123578534505525e-05, "loss": 3.1382, "step": 2828 }, { "epoch": 0.2320266461696131, "grad_norm": 0.6152271628379822, "learning_rate": 9.122044554751942e-05, "loss": 3.1326, "step": 2830 }, { "epoch": 0.23219062259800155, "grad_norm": 0.576244592666626, "learning_rate": 9.120509362920846e-05, "loss": 3.1151, "step": 2832 }, { "epoch": 0.23235459902638997, "grad_norm": 0.6472841501235962, "learning_rate": 9.118972959463656e-05, "loss": 3.1018, "step": 2834 }, { "epoch": 0.2325185754547784, "grad_norm": 0.5974353551864624, "learning_rate": 9.11743534483215e-05, "loss": 3.1377, "step": 2836 }, { "epoch": 0.2326825518831668, "grad_norm": 0.5625829696655273, "learning_rate": 9.115896519478458e-05, "loss": 3.1366, "step": 2838 }, { "epoch": 0.23284652831155522, "grad_norm": 0.6244992613792419, "learning_rate": 9.11435648385507e-05, "loss": 3.1386, "step": 2840 }, { "epoch": 0.23301050473994364, "grad_norm": 0.6011447310447693, "learning_rate": 9.11281523841483e-05, "loss": 3.1042, "step": 2842 }, { "epoch": 0.23317448116833206, "grad_norm": 0.6402640342712402, "learning_rate": 9.111272783610934e-05, "loss": 3.1381, "step": 2844 }, { "epoch": 0.23333845759672048, "grad_norm": 0.689268171787262, "learning_rate": 9.109729119896941e-05, "loss": 3.152, "step": 2846 }, { "epoch": 0.2335024340251089, "grad_norm": 0.6104257702827454, "learning_rate": 9.108184247726759e-05, "loss": 3.0882, "step": 2848 }, { "epoch": 0.23366641045349731, "grad_norm": 0.490530401468277, "learning_rate": 9.106638167554657e-05, "loss": 3.1375, "step": 2850 }, { "epoch": 0.23383038688188573, "grad_norm": 0.5417265892028809, "learning_rate": 9.105090879835254e-05, "loss": 3.0875, "step": 2852 }, { "epoch": 0.23399436331027415, "grad_norm": 0.5406416654586792, "learning_rate": 9.103542385023526e-05, "loss": 3.1689, "step": 2854 }, { "epoch": 0.23415833973866257, "grad_norm": 0.5073980093002319, "learning_rate": 9.101992683574805e-05, "loss": 3.1425, "step": 2856 }, { "epoch": 0.234322316167051, "grad_norm": 0.5920371413230896, "learning_rate": 9.100441775944779e-05, "loss": 3.1296, "step": 2858 }, { "epoch": 0.2344862925954394, "grad_norm": 0.4810742735862732, "learning_rate": 9.098889662589485e-05, "loss": 3.0661, "step": 2860 }, { "epoch": 0.23465026902382782, "grad_norm": 0.5148147344589233, "learning_rate": 9.097336343965321e-05, "loss": 3.0586, "step": 2862 }, { "epoch": 0.23481424545221624, "grad_norm": 0.5372908115386963, "learning_rate": 9.095781820529036e-05, "loss": 3.0886, "step": 2864 }, { "epoch": 0.23497822188060466, "grad_norm": 0.5466518402099609, "learning_rate": 9.094226092737734e-05, "loss": 3.1166, "step": 2866 }, { "epoch": 0.23514219830899308, "grad_norm": 0.6220472455024719, "learning_rate": 9.092669161048873e-05, "loss": 3.0579, "step": 2868 }, { "epoch": 0.2353061747373815, "grad_norm": 0.5313682556152344, "learning_rate": 9.091111025920266e-05, "loss": 3.0914, "step": 2870 }, { "epoch": 0.23547015116576991, "grad_norm": 0.5322121381759644, "learning_rate": 9.089551687810076e-05, "loss": 3.1197, "step": 2872 }, { "epoch": 0.23563412759415833, "grad_norm": 0.5401471257209778, "learning_rate": 9.087991147176827e-05, "loss": 3.0361, "step": 2874 }, { "epoch": 0.23579810402254675, "grad_norm": 0.5407954454421997, "learning_rate": 9.086429404479389e-05, "loss": 3.0943, "step": 2876 }, { "epoch": 0.23596208045093517, "grad_norm": 0.6507935523986816, "learning_rate": 9.084866460176991e-05, "loss": 3.1444, "step": 2878 }, { "epoch": 0.2361260568793236, "grad_norm": 0.621780276298523, "learning_rate": 9.08330231472921e-05, "loss": 3.1094, "step": 2880 }, { "epoch": 0.236290033307712, "grad_norm": 0.560219943523407, "learning_rate": 9.081736968595982e-05, "loss": 3.1433, "step": 2882 }, { "epoch": 0.23645400973610042, "grad_norm": 0.5493839979171753, "learning_rate": 9.080170422237593e-05, "loss": 3.0879, "step": 2884 }, { "epoch": 0.23661798616448884, "grad_norm": 0.4997730851173401, "learning_rate": 9.07860267611468e-05, "loss": 3.0705, "step": 2886 }, { "epoch": 0.2367819625928773, "grad_norm": 0.5000733137130737, "learning_rate": 9.077033730688239e-05, "loss": 3.0918, "step": 2888 }, { "epoch": 0.2369459390212657, "grad_norm": 0.595166027545929, "learning_rate": 9.075463586419613e-05, "loss": 3.1018, "step": 2890 }, { "epoch": 0.23710991544965412, "grad_norm": 0.5767890214920044, "learning_rate": 9.073892243770497e-05, "loss": 3.0718, "step": 2892 }, { "epoch": 0.23727389187804254, "grad_norm": 0.544654369354248, "learning_rate": 9.072319703202942e-05, "loss": 3.0892, "step": 2894 }, { "epoch": 0.23743786830643096, "grad_norm": 0.6508696675300598, "learning_rate": 9.070745965179353e-05, "loss": 3.1152, "step": 2896 }, { "epoch": 0.23760184473481938, "grad_norm": 0.6514599323272705, "learning_rate": 9.06917103016248e-05, "loss": 3.1539, "step": 2898 }, { "epoch": 0.2377658211632078, "grad_norm": 0.5342041254043579, "learning_rate": 9.06759489861543e-05, "loss": 3.0977, "step": 2900 }, { "epoch": 0.23792979759159621, "grad_norm": 0.5804237127304077, "learning_rate": 9.066017571001662e-05, "loss": 3.0767, "step": 2902 }, { "epoch": 0.23809377401998463, "grad_norm": 0.5167868137359619, "learning_rate": 9.064439047784982e-05, "loss": 3.0855, "step": 2904 }, { "epoch": 0.23825775044837305, "grad_norm": 0.5301772356033325, "learning_rate": 9.062859329429556e-05, "loss": 3.1147, "step": 2906 }, { "epoch": 0.23842172687676147, "grad_norm": 0.5427364110946655, "learning_rate": 9.061278416399895e-05, "loss": 3.1045, "step": 2908 }, { "epoch": 0.2385857033051499, "grad_norm": 0.5556970834732056, "learning_rate": 9.059696309160859e-05, "loss": 3.1235, "step": 2910 }, { "epoch": 0.2387496797335383, "grad_norm": 0.5361714363098145, "learning_rate": 9.058113008177667e-05, "loss": 3.1411, "step": 2912 }, { "epoch": 0.23891365616192672, "grad_norm": 0.7139540910720825, "learning_rate": 9.056528513915882e-05, "loss": 3.1739, "step": 2914 }, { "epoch": 0.23907763259031514, "grad_norm": 0.6499415040016174, "learning_rate": 9.054942826841427e-05, "loss": 3.0815, "step": 2916 }, { "epoch": 0.23924160901870356, "grad_norm": 0.6187708973884583, "learning_rate": 9.05335594742056e-05, "loss": 3.1197, "step": 2918 }, { "epoch": 0.23940558544709198, "grad_norm": 0.5267696976661682, "learning_rate": 9.051767876119906e-05, "loss": 3.1279, "step": 2920 }, { "epoch": 0.2395695618754804, "grad_norm": 0.5782443284988403, "learning_rate": 9.050178613406432e-05, "loss": 3.1206, "step": 2922 }, { "epoch": 0.23973353830386882, "grad_norm": 0.5910431742668152, "learning_rate": 9.048588159747457e-05, "loss": 3.11, "step": 2924 }, { "epoch": 0.23989751473225723, "grad_norm": 0.5470311641693115, "learning_rate": 9.046996515610649e-05, "loss": 3.0588, "step": 2926 }, { "epoch": 0.24006149116064565, "grad_norm": 0.6402561068534851, "learning_rate": 9.045403681464028e-05, "loss": 3.131, "step": 2928 }, { "epoch": 0.24022546758903407, "grad_norm": 0.5332674980163574, "learning_rate": 9.043809657775964e-05, "loss": 3.1398, "step": 2930 }, { "epoch": 0.2403894440174225, "grad_norm": 0.5881835222244263, "learning_rate": 9.042214445015176e-05, "loss": 3.1354, "step": 2932 }, { "epoch": 0.2405534204458109, "grad_norm": 0.6585120558738708, "learning_rate": 9.04061804365073e-05, "loss": 3.1273, "step": 2934 }, { "epoch": 0.24071739687419932, "grad_norm": 0.6403549313545227, "learning_rate": 9.039020454152047e-05, "loss": 3.1051, "step": 2936 }, { "epoch": 0.24088137330258774, "grad_norm": 0.60472571849823, "learning_rate": 9.037421676988893e-05, "loss": 3.1076, "step": 2938 }, { "epoch": 0.24104534973097616, "grad_norm": 0.5805239081382751, "learning_rate": 9.035821712631385e-05, "loss": 3.1201, "step": 2940 }, { "epoch": 0.24120932615936458, "grad_norm": 0.4733094274997711, "learning_rate": 9.034220561549988e-05, "loss": 3.127, "step": 2942 }, { "epoch": 0.241373302587753, "grad_norm": 0.6691949963569641, "learning_rate": 9.03261822421552e-05, "loss": 3.1115, "step": 2944 }, { "epoch": 0.24153727901614144, "grad_norm": 0.6166451573371887, "learning_rate": 9.031014701099139e-05, "loss": 3.1177, "step": 2946 }, { "epoch": 0.24170125544452986, "grad_norm": 0.7083244323730469, "learning_rate": 9.029409992672359e-05, "loss": 3.095, "step": 2948 }, { "epoch": 0.24186523187291828, "grad_norm": 0.6332703232765198, "learning_rate": 9.027804099407045e-05, "loss": 3.1122, "step": 2950 }, { "epoch": 0.2420292083013067, "grad_norm": 0.6068108081817627, "learning_rate": 9.026197021775402e-05, "loss": 3.0873, "step": 2952 }, { "epoch": 0.24219318472969512, "grad_norm": 0.5226258039474487, "learning_rate": 9.024588760249988e-05, "loss": 3.0131, "step": 2954 }, { "epoch": 0.24235716115808353, "grad_norm": 0.5775647163391113, "learning_rate": 9.02297931530371e-05, "loss": 3.115, "step": 2956 }, { "epoch": 0.24252113758647195, "grad_norm": 0.5021248459815979, "learning_rate": 9.021368687409819e-05, "loss": 3.0398, "step": 2958 }, { "epoch": 0.24268511401486037, "grad_norm": 0.5804054737091064, "learning_rate": 9.019756877041918e-05, "loss": 3.1158, "step": 2960 }, { "epoch": 0.2428490904432488, "grad_norm": 0.5360262989997864, "learning_rate": 9.018143884673957e-05, "loss": 3.1476, "step": 2962 }, { "epoch": 0.2430130668716372, "grad_norm": 0.5107494592666626, "learning_rate": 9.016529710780231e-05, "loss": 3.0919, "step": 2964 }, { "epoch": 0.24317704330002562, "grad_norm": 0.5997065305709839, "learning_rate": 9.014914355835384e-05, "loss": 3.08, "step": 2966 }, { "epoch": 0.24334101972841404, "grad_norm": 0.5437501668930054, "learning_rate": 9.013297820314408e-05, "loss": 3.1194, "step": 2968 }, { "epoch": 0.24350499615680246, "grad_norm": 0.5322654843330383, "learning_rate": 9.01168010469264e-05, "loss": 3.0669, "step": 2970 }, { "epoch": 0.24366897258519088, "grad_norm": 0.48639851808547974, "learning_rate": 9.010061209445769e-05, "loss": 3.1127, "step": 2972 }, { "epoch": 0.2438329490135793, "grad_norm": 0.4471394717693329, "learning_rate": 9.008441135049823e-05, "loss": 3.1262, "step": 2974 }, { "epoch": 0.24399692544196772, "grad_norm": 0.4837088882923126, "learning_rate": 9.006819881981184e-05, "loss": 3.0944, "step": 2976 }, { "epoch": 0.24416090187035613, "grad_norm": 0.4812915027141571, "learning_rate": 9.005197450716577e-05, "loss": 3.0651, "step": 2978 }, { "epoch": 0.24432487829874455, "grad_norm": 0.49380356073379517, "learning_rate": 9.003573841733075e-05, "loss": 3.1066, "step": 2980 }, { "epoch": 0.24448885472713297, "grad_norm": 0.49508631229400635, "learning_rate": 9.001949055508094e-05, "loss": 3.0422, "step": 2982 }, { "epoch": 0.2446528311555214, "grad_norm": 0.5182914137840271, "learning_rate": 9.0003230925194e-05, "loss": 3.1087, "step": 2984 }, { "epoch": 0.2448168075839098, "grad_norm": 0.5734208226203918, "learning_rate": 8.998695953245103e-05, "loss": 3.0798, "step": 2986 }, { "epoch": 0.24498078401229822, "grad_norm": 0.5737510919570923, "learning_rate": 8.99706763816366e-05, "loss": 3.1035, "step": 2988 }, { "epoch": 0.24514476044068664, "grad_norm": 0.5363655686378479, "learning_rate": 8.995438147753874e-05, "loss": 3.1577, "step": 2990 }, { "epoch": 0.24530873686907506, "grad_norm": 0.5472906231880188, "learning_rate": 8.993807482494892e-05, "loss": 3.1101, "step": 2992 }, { "epoch": 0.24547271329746348, "grad_norm": 0.5381254553794861, "learning_rate": 8.992175642866208e-05, "loss": 3.0842, "step": 2994 }, { "epoch": 0.2456366897258519, "grad_norm": 0.5339920520782471, "learning_rate": 8.990542629347658e-05, "loss": 3.0531, "step": 2996 }, { "epoch": 0.24580066615424032, "grad_norm": 0.5434836149215698, "learning_rate": 8.988908442419429e-05, "loss": 3.0259, "step": 2998 }, { "epoch": 0.24596464258262873, "grad_norm": 0.5441123247146606, "learning_rate": 8.987273082562048e-05, "loss": 3.0843, "step": 3000 }, { "epoch": 0.24612861901101718, "grad_norm": 0.5503421425819397, "learning_rate": 8.98563655025639e-05, "loss": 3.09, "step": 3002 }, { "epoch": 0.2462925954394056, "grad_norm": 0.521301805973053, "learning_rate": 8.983998845983672e-05, "loss": 3.0932, "step": 3004 }, { "epoch": 0.24645657186779402, "grad_norm": 0.47878655791282654, "learning_rate": 8.982359970225458e-05, "loss": 3.0644, "step": 3006 }, { "epoch": 0.24662054829618243, "grad_norm": 0.56011962890625, "learning_rate": 8.980719923463654e-05, "loss": 3.0469, "step": 3008 }, { "epoch": 0.24678452472457085, "grad_norm": 0.5374152064323425, "learning_rate": 8.979078706180515e-05, "loss": 3.0857, "step": 3010 }, { "epoch": 0.24694850115295927, "grad_norm": 0.524940013885498, "learning_rate": 8.977436318858635e-05, "loss": 3.046, "step": 3012 }, { "epoch": 0.2471124775813477, "grad_norm": 0.4905287027359009, "learning_rate": 8.975792761980954e-05, "loss": 3.0903, "step": 3014 }, { "epoch": 0.2472764540097361, "grad_norm": 0.5330361127853394, "learning_rate": 8.974148036030758e-05, "loss": 3.0994, "step": 3016 }, { "epoch": 0.24744043043812453, "grad_norm": 0.5203661322593689, "learning_rate": 8.972502141491673e-05, "loss": 3.0716, "step": 3018 }, { "epoch": 0.24760440686651294, "grad_norm": 0.5972068905830383, "learning_rate": 8.970855078847669e-05, "loss": 3.1228, "step": 3020 }, { "epoch": 0.24776838329490136, "grad_norm": 0.5131882429122925, "learning_rate": 8.969206848583062e-05, "loss": 3.0532, "step": 3022 }, { "epoch": 0.24793235972328978, "grad_norm": 0.5639394521713257, "learning_rate": 8.967557451182514e-05, "loss": 3.0587, "step": 3024 }, { "epoch": 0.2480963361516782, "grad_norm": 0.5577993392944336, "learning_rate": 8.965906887131022e-05, "loss": 3.0912, "step": 3026 }, { "epoch": 0.24826031258006662, "grad_norm": 0.5181630849838257, "learning_rate": 8.964255156913933e-05, "loss": 3.0889, "step": 3028 }, { "epoch": 0.24842428900845503, "grad_norm": 0.5294365286827087, "learning_rate": 8.96260226101693e-05, "loss": 3.0869, "step": 3030 }, { "epoch": 0.24858826543684345, "grad_norm": 0.513014554977417, "learning_rate": 8.960948199926048e-05, "loss": 3.1187, "step": 3032 }, { "epoch": 0.24875224186523187, "grad_norm": 0.47102218866348267, "learning_rate": 8.95929297412766e-05, "loss": 3.0829, "step": 3034 }, { "epoch": 0.2489162182936203, "grad_norm": 0.47533565759658813, "learning_rate": 8.957636584108476e-05, "loss": 3.0324, "step": 3036 }, { "epoch": 0.2490801947220087, "grad_norm": 0.5862759351730347, "learning_rate": 8.955979030355559e-05, "loss": 3.1245, "step": 3038 }, { "epoch": 0.24924417115039713, "grad_norm": 0.5523611307144165, "learning_rate": 8.954320313356306e-05, "loss": 3.0173, "step": 3040 }, { "epoch": 0.24940814757878554, "grad_norm": 0.5045341849327087, "learning_rate": 8.952660433598459e-05, "loss": 3.0735, "step": 3042 }, { "epoch": 0.24957212400717396, "grad_norm": 0.5677223205566406, "learning_rate": 8.950999391570103e-05, "loss": 3.0802, "step": 3044 }, { "epoch": 0.24973610043556238, "grad_norm": 0.5546299815177917, "learning_rate": 8.949337187759663e-05, "loss": 3.0347, "step": 3046 }, { "epoch": 0.2499000768639508, "grad_norm": 0.5589927434921265, "learning_rate": 8.947673822655906e-05, "loss": 3.1115, "step": 3048 }, { "epoch": 0.2500640532923392, "grad_norm": 0.5995765328407288, "learning_rate": 8.946009296747942e-05, "loss": 3.0568, "step": 3050 }, { "epoch": 0.25022802972072766, "grad_norm": 0.5527830719947815, "learning_rate": 8.944343610525216e-05, "loss": 3.0868, "step": 3052 }, { "epoch": 0.25039200614911605, "grad_norm": 0.4991224408149719, "learning_rate": 8.942676764477524e-05, "loss": 3.0589, "step": 3054 }, { "epoch": 0.2505559825775045, "grad_norm": 0.5033073425292969, "learning_rate": 8.941008759094998e-05, "loss": 3.0778, "step": 3056 }, { "epoch": 0.2507199590058929, "grad_norm": 0.49312669038772583, "learning_rate": 8.939339594868109e-05, "loss": 3.0657, "step": 3058 }, { "epoch": 0.25088393543428134, "grad_norm": 0.49565747380256653, "learning_rate": 8.937669272287672e-05, "loss": 3.0625, "step": 3060 }, { "epoch": 0.2510479118626697, "grad_norm": 0.5610074996948242, "learning_rate": 8.935997791844842e-05, "loss": 3.0784, "step": 3062 }, { "epoch": 0.25121188829105817, "grad_norm": 0.5718916058540344, "learning_rate": 8.93432515403111e-05, "loss": 3.0565, "step": 3064 }, { "epoch": 0.25137586471944656, "grad_norm": 0.5423455238342285, "learning_rate": 8.932651359338316e-05, "loss": 3.0458, "step": 3066 }, { "epoch": 0.251539841147835, "grad_norm": 0.5095962285995483, "learning_rate": 8.930976408258633e-05, "loss": 3.0652, "step": 3068 }, { "epoch": 0.2517038175762234, "grad_norm": 0.522039532661438, "learning_rate": 8.929300301284578e-05, "loss": 3.0416, "step": 3070 }, { "epoch": 0.25186779400461184, "grad_norm": 0.5137038230895996, "learning_rate": 8.927623038909004e-05, "loss": 3.0422, "step": 3072 }, { "epoch": 0.25203177043300024, "grad_norm": 0.4940985441207886, "learning_rate": 8.925944621625109e-05, "loss": 3.0576, "step": 3074 }, { "epoch": 0.2521957468613887, "grad_norm": 0.492217093706131, "learning_rate": 8.924265049926423e-05, "loss": 3.0481, "step": 3076 }, { "epoch": 0.25235972328977707, "grad_norm": 0.5192411541938782, "learning_rate": 8.922584324306827e-05, "loss": 3.087, "step": 3078 }, { "epoch": 0.2525236997181655, "grad_norm": 0.47307392954826355, "learning_rate": 8.920902445260528e-05, "loss": 3.0104, "step": 3080 }, { "epoch": 0.2526876761465539, "grad_norm": 0.4889049828052521, "learning_rate": 8.919219413282083e-05, "loss": 3.0765, "step": 3082 }, { "epoch": 0.25285165257494235, "grad_norm": 0.4941463768482208, "learning_rate": 8.917535228866379e-05, "loss": 3.0438, "step": 3084 }, { "epoch": 0.25301562900333074, "grad_norm": 0.49077731370925903, "learning_rate": 8.915849892508652e-05, "loss": 3.0448, "step": 3086 }, { "epoch": 0.2531796054317192, "grad_norm": 0.5119606256484985, "learning_rate": 8.914163404704466e-05, "loss": 3.0407, "step": 3088 }, { "epoch": 0.25334358186010764, "grad_norm": 0.5803630352020264, "learning_rate": 8.912475765949733e-05, "loss": 3.072, "step": 3090 }, { "epoch": 0.253507558288496, "grad_norm": 0.5256679654121399, "learning_rate": 8.910786976740697e-05, "loss": 3.1345, "step": 3092 }, { "epoch": 0.2536715347168845, "grad_norm": 0.5043202042579651, "learning_rate": 8.909097037573941e-05, "loss": 3.0407, "step": 3094 }, { "epoch": 0.25383551114527286, "grad_norm": 0.536882221698761, "learning_rate": 8.907405948946393e-05, "loss": 3.0808, "step": 3096 }, { "epoch": 0.2539994875736613, "grad_norm": 0.5040213465690613, "learning_rate": 8.905713711355308e-05, "loss": 3.0875, "step": 3098 }, { "epoch": 0.2541634640020497, "grad_norm": 0.5036368370056152, "learning_rate": 8.904020325298286e-05, "loss": 3.0399, "step": 3100 }, { "epoch": 0.25432744043043815, "grad_norm": 0.5161576271057129, "learning_rate": 8.902325791273265e-05, "loss": 3.0826, "step": 3102 }, { "epoch": 0.25449141685882654, "grad_norm": 0.49616822600364685, "learning_rate": 8.900630109778517e-05, "loss": 3.0633, "step": 3104 }, { "epoch": 0.254655393287215, "grad_norm": 0.4793374240398407, "learning_rate": 8.898933281312653e-05, "loss": 3.081, "step": 3106 }, { "epoch": 0.25481936971560337, "grad_norm": 0.46786659955978394, "learning_rate": 8.897235306374625e-05, "loss": 3.0785, "step": 3108 }, { "epoch": 0.2549833461439918, "grad_norm": 0.45357832312583923, "learning_rate": 8.895536185463713e-05, "loss": 3.0654, "step": 3110 }, { "epoch": 0.2551473225723802, "grad_norm": 0.5247970223426819, "learning_rate": 8.893835919079543e-05, "loss": 3.1274, "step": 3112 }, { "epoch": 0.25531129900076865, "grad_norm": 0.5221189260482788, "learning_rate": 8.892134507722074e-05, "loss": 3.0446, "step": 3114 }, { "epoch": 0.25547527542915704, "grad_norm": 0.518396258354187, "learning_rate": 8.8904319518916e-05, "loss": 3.1165, "step": 3116 }, { "epoch": 0.2556392518575455, "grad_norm": 0.4879421591758728, "learning_rate": 8.888728252088758e-05, "loss": 3.0649, "step": 3118 }, { "epoch": 0.2558032282859339, "grad_norm": 0.47542670369148254, "learning_rate": 8.887023408814512e-05, "loss": 3.0965, "step": 3120 }, { "epoch": 0.2559672047143223, "grad_norm": 0.5255916714668274, "learning_rate": 8.88531742257017e-05, "loss": 3.0032, "step": 3122 }, { "epoch": 0.2561311811427107, "grad_norm": 0.5671265125274658, "learning_rate": 8.883610293857371e-05, "loss": 3.0821, "step": 3124 }, { "epoch": 0.25629515757109916, "grad_norm": 0.5040590167045593, "learning_rate": 8.881902023178094e-05, "loss": 3.0581, "step": 3126 }, { "epoch": 0.25645913399948755, "grad_norm": 0.5780110359191895, "learning_rate": 8.880192611034652e-05, "loss": 3.0678, "step": 3128 }, { "epoch": 0.256623110427876, "grad_norm": 0.5004504323005676, "learning_rate": 8.878482057929693e-05, "loss": 3.0434, "step": 3130 }, { "epoch": 0.2567870868562644, "grad_norm": 0.5049440860748291, "learning_rate": 8.876770364366201e-05, "loss": 3.0637, "step": 3132 }, { "epoch": 0.25695106328465284, "grad_norm": 0.51917964220047, "learning_rate": 8.875057530847497e-05, "loss": 3.0346, "step": 3134 }, { "epoch": 0.2571150397130412, "grad_norm": 0.5143879055976868, "learning_rate": 8.873343557877234e-05, "loss": 3.016, "step": 3136 }, { "epoch": 0.2572790161414297, "grad_norm": 0.4929054081439972, "learning_rate": 8.871628445959402e-05, "loss": 3.0746, "step": 3138 }, { "epoch": 0.25744299256981806, "grad_norm": 0.4992254972457886, "learning_rate": 8.869912195598326e-05, "loss": 3.0855, "step": 3140 }, { "epoch": 0.2576069689982065, "grad_norm": 0.5007113814353943, "learning_rate": 8.868194807298664e-05, "loss": 2.9942, "step": 3142 }, { "epoch": 0.2577709454265949, "grad_norm": 0.5016566514968872, "learning_rate": 8.866476281565413e-05, "loss": 3.0523, "step": 3144 }, { "epoch": 0.25793492185498335, "grad_norm": 0.5850445032119751, "learning_rate": 8.864756618903898e-05, "loss": 3.0497, "step": 3146 }, { "epoch": 0.2580988982833718, "grad_norm": 0.5336945056915283, "learning_rate": 8.863035819819784e-05, "loss": 3.0543, "step": 3148 }, { "epoch": 0.2582628747117602, "grad_norm": 0.5204024314880371, "learning_rate": 8.861313884819066e-05, "loss": 3.0131, "step": 3150 }, { "epoch": 0.25842685114014863, "grad_norm": 0.49752482771873474, "learning_rate": 8.859590814408078e-05, "loss": 3.0386, "step": 3152 }, { "epoch": 0.258590827568537, "grad_norm": 0.5874549746513367, "learning_rate": 8.857866609093484e-05, "loss": 3.1003, "step": 3154 }, { "epoch": 0.25875480399692546, "grad_norm": 0.5115360617637634, "learning_rate": 8.856141269382281e-05, "loss": 2.9792, "step": 3156 }, { "epoch": 0.25891878042531385, "grad_norm": 0.5269978046417236, "learning_rate": 8.8544147957818e-05, "loss": 3.0692, "step": 3158 }, { "epoch": 0.2590827568537023, "grad_norm": 0.5046514868736267, "learning_rate": 8.852687188799709e-05, "loss": 3.0371, "step": 3160 }, { "epoch": 0.2592467332820907, "grad_norm": 0.570195198059082, "learning_rate": 8.850958448944007e-05, "loss": 3.0922, "step": 3162 }, { "epoch": 0.25941070971047914, "grad_norm": 0.5811053514480591, "learning_rate": 8.849228576723024e-05, "loss": 3.0337, "step": 3164 }, { "epoch": 0.2595746861388675, "grad_norm": 0.4887376129627228, "learning_rate": 8.847497572645424e-05, "loss": 3.0332, "step": 3166 }, { "epoch": 0.259738662567256, "grad_norm": 0.5527136325836182, "learning_rate": 8.845765437220209e-05, "loss": 3.029, "step": 3168 }, { "epoch": 0.25990263899564436, "grad_norm": 0.5504634380340576, "learning_rate": 8.844032170956707e-05, "loss": 3.0602, "step": 3170 }, { "epoch": 0.2600666154240328, "grad_norm": 0.48267602920532227, "learning_rate": 8.842297774364579e-05, "loss": 3.0176, "step": 3172 }, { "epoch": 0.2602305918524212, "grad_norm": 0.5267589092254639, "learning_rate": 8.840562247953822e-05, "loss": 3.082, "step": 3174 }, { "epoch": 0.26039456828080965, "grad_norm": 0.48394930362701416, "learning_rate": 8.838825592234763e-05, "loss": 3.0773, "step": 3176 }, { "epoch": 0.26055854470919804, "grad_norm": 0.5486636757850647, "learning_rate": 8.837087807718062e-05, "loss": 3.0657, "step": 3178 }, { "epoch": 0.2607225211375865, "grad_norm": 0.6029240489006042, "learning_rate": 8.835348894914712e-05, "loss": 3.0705, "step": 3180 }, { "epoch": 0.2608864975659749, "grad_norm": 0.5119585394859314, "learning_rate": 8.833608854336032e-05, "loss": 3.0849, "step": 3182 }, { "epoch": 0.2610504739943633, "grad_norm": 0.5734114050865173, "learning_rate": 8.831867686493682e-05, "loss": 3.093, "step": 3184 }, { "epoch": 0.2612144504227517, "grad_norm": 0.5184375047683716, "learning_rate": 8.830125391899645e-05, "loss": 3.0587, "step": 3186 }, { "epoch": 0.26137842685114016, "grad_norm": 0.5614577531814575, "learning_rate": 8.828381971066238e-05, "loss": 3.022, "step": 3188 }, { "epoch": 0.26154240327952855, "grad_norm": 0.4999874532222748, "learning_rate": 8.82663742450611e-05, "loss": 3.0534, "step": 3190 }, { "epoch": 0.261706379707917, "grad_norm": 0.5082796812057495, "learning_rate": 8.824891752732246e-05, "loss": 3.0002, "step": 3192 }, { "epoch": 0.2618703561363054, "grad_norm": 0.4463191628456116, "learning_rate": 8.82314495625795e-05, "loss": 3.1116, "step": 3194 }, { "epoch": 0.26203433256469383, "grad_norm": 0.49782219529151917, "learning_rate": 8.821397035596865e-05, "loss": 3.0144, "step": 3196 }, { "epoch": 0.2621983089930822, "grad_norm": 0.4964871406555176, "learning_rate": 8.819647991262965e-05, "loss": 3.0592, "step": 3198 }, { "epoch": 0.26236228542147066, "grad_norm": 0.5055323839187622, "learning_rate": 8.817897823770552e-05, "loss": 3.0538, "step": 3200 }, { "epoch": 0.2625262618498591, "grad_norm": 0.5108357071876526, "learning_rate": 8.816146533634258e-05, "loss": 3.0168, "step": 3202 }, { "epoch": 0.2626902382782475, "grad_norm": 0.5637660026550293, "learning_rate": 8.814394121369044e-05, "loss": 3.0205, "step": 3204 }, { "epoch": 0.26285421470663595, "grad_norm": 0.5199828147888184, "learning_rate": 8.812640587490206e-05, "loss": 3.0393, "step": 3206 }, { "epoch": 0.26301819113502434, "grad_norm": 0.518085241317749, "learning_rate": 8.810885932513364e-05, "loss": 3.0558, "step": 3208 }, { "epoch": 0.2631821675634128, "grad_norm": 0.5022614598274231, "learning_rate": 8.809130156954472e-05, "loss": 3.0701, "step": 3210 }, { "epoch": 0.2633461439918012, "grad_norm": 0.5334145426750183, "learning_rate": 8.807373261329809e-05, "loss": 3.0292, "step": 3212 }, { "epoch": 0.2635101204201896, "grad_norm": 0.5729058980941772, "learning_rate": 8.80561524615599e-05, "loss": 3.0583, "step": 3214 }, { "epoch": 0.263674096848578, "grad_norm": 0.5852642059326172, "learning_rate": 8.803856111949952e-05, "loss": 3.0467, "step": 3216 }, { "epoch": 0.26383807327696646, "grad_norm": 0.5108850598335266, "learning_rate": 8.802095859228965e-05, "loss": 2.9989, "step": 3218 }, { "epoch": 0.26400204970535485, "grad_norm": 0.532211184501648, "learning_rate": 8.800334488510629e-05, "loss": 3.0702, "step": 3220 }, { "epoch": 0.2641660261337433, "grad_norm": 0.5329665541648865, "learning_rate": 8.798572000312868e-05, "loss": 3.0789, "step": 3222 }, { "epoch": 0.2643300025621317, "grad_norm": 0.5084935426712036, "learning_rate": 8.796808395153939e-05, "loss": 2.9789, "step": 3224 }, { "epoch": 0.26449397899052013, "grad_norm": 0.6284180879592896, "learning_rate": 8.795043673552426e-05, "loss": 3.0542, "step": 3226 }, { "epoch": 0.2646579554189085, "grad_norm": 0.491621732711792, "learning_rate": 8.793277836027243e-05, "loss": 3.0203, "step": 3228 }, { "epoch": 0.26482193184729697, "grad_norm": 0.5157349705696106, "learning_rate": 8.791510883097627e-05, "loss": 2.9835, "step": 3230 }, { "epoch": 0.26498590827568536, "grad_norm": 0.5752792954444885, "learning_rate": 8.789742815283147e-05, "loss": 3.0707, "step": 3232 }, { "epoch": 0.2651498847040738, "grad_norm": 0.5733649730682373, "learning_rate": 8.787973633103701e-05, "loss": 2.9905, "step": 3234 }, { "epoch": 0.2653138611324622, "grad_norm": 0.5354986786842346, "learning_rate": 8.786203337079512e-05, "loss": 3.0373, "step": 3236 }, { "epoch": 0.26547783756085064, "grad_norm": 0.590827226638794, "learning_rate": 8.78443192773113e-05, "loss": 3.1053, "step": 3238 }, { "epoch": 0.26564181398923903, "grad_norm": 0.5951526165008545, "learning_rate": 8.782659405579437e-05, "loss": 3.0664, "step": 3240 }, { "epoch": 0.2658057904176275, "grad_norm": 0.5225715637207031, "learning_rate": 8.780885771145635e-05, "loss": 3.0216, "step": 3242 }, { "epoch": 0.26596976684601586, "grad_norm": 0.5698503851890564, "learning_rate": 8.77911102495126e-05, "loss": 3.0631, "step": 3244 }, { "epoch": 0.2661337432744043, "grad_norm": 0.4748436212539673, "learning_rate": 8.777335167518172e-05, "loss": 3.0373, "step": 3246 }, { "epoch": 0.2662977197027927, "grad_norm": 0.5692675709724426, "learning_rate": 8.775558199368556e-05, "loss": 3.0603, "step": 3248 }, { "epoch": 0.26646169613118115, "grad_norm": 0.5429494380950928, "learning_rate": 8.773780121024925e-05, "loss": 3.0812, "step": 3250 }, { "epoch": 0.26662567255956954, "grad_norm": 0.5639028549194336, "learning_rate": 8.77200093301012e-05, "loss": 3.0548, "step": 3252 }, { "epoch": 0.266789648987958, "grad_norm": 0.4472223222255707, "learning_rate": 8.770220635847308e-05, "loss": 3.0257, "step": 3254 }, { "epoch": 0.2669536254163464, "grad_norm": 0.5361818671226501, "learning_rate": 8.76843923005998e-05, "loss": 3.0535, "step": 3256 }, { "epoch": 0.2671176018447348, "grad_norm": 0.49390971660614014, "learning_rate": 8.766656716171952e-05, "loss": 3.0318, "step": 3258 }, { "epoch": 0.26728157827312327, "grad_norm": 0.47352907061576843, "learning_rate": 8.764873094707371e-05, "loss": 3.024, "step": 3260 }, { "epoch": 0.26744555470151166, "grad_norm": 0.5356292128562927, "learning_rate": 8.763088366190709e-05, "loss": 3.0469, "step": 3262 }, { "epoch": 0.2676095311299001, "grad_norm": 0.5446259379386902, "learning_rate": 8.761302531146754e-05, "loss": 3.0701, "step": 3264 }, { "epoch": 0.2677735075582885, "grad_norm": 0.481741338968277, "learning_rate": 8.759515590100633e-05, "loss": 3.0365, "step": 3266 }, { "epoch": 0.26793748398667694, "grad_norm": 0.4720764756202698, "learning_rate": 8.75772754357779e-05, "loss": 3.0117, "step": 3268 }, { "epoch": 0.26810146041506533, "grad_norm": 0.4751954674720764, "learning_rate": 8.755938392103993e-05, "loss": 3.0229, "step": 3270 }, { "epoch": 0.2682654368434538, "grad_norm": 0.5223908424377441, "learning_rate": 8.754148136205343e-05, "loss": 3.0563, "step": 3272 }, { "epoch": 0.26842941327184217, "grad_norm": 0.5289605855941772, "learning_rate": 8.752356776408254e-05, "loss": 3.0188, "step": 3274 }, { "epoch": 0.2685933897002306, "grad_norm": 0.4922078549861908, "learning_rate": 8.750564313239478e-05, "loss": 3.0092, "step": 3276 }, { "epoch": 0.268757366128619, "grad_norm": 0.5029440522193909, "learning_rate": 8.74877074722608e-05, "loss": 3.0981, "step": 3278 }, { "epoch": 0.26892134255700745, "grad_norm": 0.5761165618896484, "learning_rate": 8.746976078895457e-05, "loss": 3.0617, "step": 3280 }, { "epoch": 0.26908531898539584, "grad_norm": 0.5281015634536743, "learning_rate": 8.745180308775325e-05, "loss": 3.0612, "step": 3282 }, { "epoch": 0.2692492954137843, "grad_norm": 0.4676494896411896, "learning_rate": 8.743383437393725e-05, "loss": 3.0523, "step": 3284 }, { "epoch": 0.2694132718421727, "grad_norm": 0.4948801100254059, "learning_rate": 8.741585465279025e-05, "loss": 3.037, "step": 3286 }, { "epoch": 0.2695772482705611, "grad_norm": 0.5158532857894897, "learning_rate": 8.739786392959914e-05, "loss": 3.0052, "step": 3288 }, { "epoch": 0.2697412246989495, "grad_norm": 0.4951574504375458, "learning_rate": 8.737986220965403e-05, "loss": 3.0039, "step": 3290 }, { "epoch": 0.26990520112733796, "grad_norm": 0.4828198254108429, "learning_rate": 8.736184949824832e-05, "loss": 3.046, "step": 3292 }, { "epoch": 0.27006917755572635, "grad_norm": 0.5514748096466064, "learning_rate": 8.734382580067856e-05, "loss": 3.0188, "step": 3294 }, { "epoch": 0.2702331539841148, "grad_norm": 0.5289325714111328, "learning_rate": 8.732579112224464e-05, "loss": 2.9917, "step": 3296 }, { "epoch": 0.2703971304125032, "grad_norm": 0.4985904395580292, "learning_rate": 8.730774546824953e-05, "loss": 3.0484, "step": 3298 }, { "epoch": 0.27056110684089163, "grad_norm": 0.5238404870033264, "learning_rate": 8.728968884399959e-05, "loss": 3.0856, "step": 3300 }, { "epoch": 0.27072508326928, "grad_norm": 0.5596960186958313, "learning_rate": 8.727162125480429e-05, "loss": 3.0124, "step": 3302 }, { "epoch": 0.27088905969766847, "grad_norm": 0.5070179104804993, "learning_rate": 8.725354270597636e-05, "loss": 3.0299, "step": 3304 }, { "epoch": 0.27105303612605686, "grad_norm": 0.4900921583175659, "learning_rate": 8.723545320283178e-05, "loss": 3.0523, "step": 3306 }, { "epoch": 0.2712170125544453, "grad_norm": 0.5997633934020996, "learning_rate": 8.721735275068968e-05, "loss": 3.111, "step": 3308 }, { "epoch": 0.2713809889828337, "grad_norm": 0.6209022998809814, "learning_rate": 8.719924135487249e-05, "loss": 3.0585, "step": 3310 }, { "epoch": 0.27154496541122214, "grad_norm": 0.5038576722145081, "learning_rate": 8.718111902070583e-05, "loss": 3.0472, "step": 3312 }, { "epoch": 0.27170894183961053, "grad_norm": 0.5371909141540527, "learning_rate": 8.716298575351852e-05, "loss": 2.9892, "step": 3314 }, { "epoch": 0.271872918267999, "grad_norm": 0.5439016222953796, "learning_rate": 8.714484155864257e-05, "loss": 3.025, "step": 3316 }, { "epoch": 0.2720368946963874, "grad_norm": 0.6247979998588562, "learning_rate": 8.71266864414133e-05, "loss": 3.0627, "step": 3318 }, { "epoch": 0.2722008711247758, "grad_norm": 0.6789233684539795, "learning_rate": 8.710852040716915e-05, "loss": 3.0713, "step": 3320 }, { "epoch": 0.27236484755316426, "grad_norm": 0.6147254705429077, "learning_rate": 8.709034346125178e-05, "loss": 3.0732, "step": 3322 }, { "epoch": 0.27252882398155265, "grad_norm": 0.5249021649360657, "learning_rate": 8.707215560900612e-05, "loss": 3.0765, "step": 3324 }, { "epoch": 0.2726928004099411, "grad_norm": 0.5897969603538513, "learning_rate": 8.705395685578022e-05, "loss": 3.0331, "step": 3326 }, { "epoch": 0.2728567768383295, "grad_norm": 0.5916758179664612, "learning_rate": 8.703574720692541e-05, "loss": 3.0078, "step": 3328 }, { "epoch": 0.27302075326671793, "grad_norm": 0.48185595870018005, "learning_rate": 8.701752666779619e-05, "loss": 3.0312, "step": 3330 }, { "epoch": 0.2731847296951063, "grad_norm": 0.5248916745185852, "learning_rate": 8.699929524375025e-05, "loss": 3.0062, "step": 3332 }, { "epoch": 0.27334870612349477, "grad_norm": 0.5910229086875916, "learning_rate": 8.698105294014853e-05, "loss": 3.0185, "step": 3334 }, { "epoch": 0.27351268255188316, "grad_norm": 0.5539065003395081, "learning_rate": 8.696279976235512e-05, "loss": 3.0376, "step": 3336 }, { "epoch": 0.2736766589802716, "grad_norm": 0.5597767233848572, "learning_rate": 8.694453571573731e-05, "loss": 2.9856, "step": 3338 }, { "epoch": 0.27384063540866, "grad_norm": 0.4913848638534546, "learning_rate": 8.692626080566561e-05, "loss": 3.0781, "step": 3340 }, { "epoch": 0.27400461183704844, "grad_norm": 0.5226843357086182, "learning_rate": 8.690797503751373e-05, "loss": 3.05, "step": 3342 }, { "epoch": 0.27416858826543683, "grad_norm": 0.5141190886497498, "learning_rate": 8.688967841665853e-05, "loss": 3.0245, "step": 3344 }, { "epoch": 0.2743325646938253, "grad_norm": 0.5239917635917664, "learning_rate": 8.68713709484801e-05, "loss": 3.0523, "step": 3346 }, { "epoch": 0.27449654112221367, "grad_norm": 0.533658504486084, "learning_rate": 8.685305263836172e-05, "loss": 3.0296, "step": 3348 }, { "epoch": 0.2746605175506021, "grad_norm": 0.5123451948165894, "learning_rate": 8.683472349168982e-05, "loss": 3.0271, "step": 3350 }, { "epoch": 0.2748244939789905, "grad_norm": 0.5194646120071411, "learning_rate": 8.681638351385407e-05, "loss": 3.0006, "step": 3352 }, { "epoch": 0.27498847040737895, "grad_norm": 0.5386354923248291, "learning_rate": 8.679803271024729e-05, "loss": 3.0237, "step": 3354 }, { "epoch": 0.27515244683576734, "grad_norm": 0.5998788475990295, "learning_rate": 8.677967108626547e-05, "loss": 3.0206, "step": 3356 }, { "epoch": 0.2753164232641558, "grad_norm": 0.576684296131134, "learning_rate": 8.676129864730784e-05, "loss": 3.036, "step": 3358 }, { "epoch": 0.2754803996925442, "grad_norm": 0.5040565729141235, "learning_rate": 8.674291539877674e-05, "loss": 3.0111, "step": 3360 }, { "epoch": 0.2756443761209326, "grad_norm": 0.4546044170856476, "learning_rate": 8.672452134607772e-05, "loss": 3.0263, "step": 3362 }, { "epoch": 0.275808352549321, "grad_norm": 0.4776202440261841, "learning_rate": 8.670611649461953e-05, "loss": 3.0355, "step": 3364 }, { "epoch": 0.27597232897770946, "grad_norm": 0.5171165466308594, "learning_rate": 8.668770084981408e-05, "loss": 3.0517, "step": 3366 }, { "epoch": 0.27613630540609785, "grad_norm": 0.613608181476593, "learning_rate": 8.666927441707639e-05, "loss": 3.0085, "step": 3368 }, { "epoch": 0.2763002818344863, "grad_norm": 0.5442901849746704, "learning_rate": 8.665083720182479e-05, "loss": 3.0408, "step": 3370 }, { "epoch": 0.2764642582628747, "grad_norm": 0.4858104884624481, "learning_rate": 8.663238920948065e-05, "loss": 3.0797, "step": 3372 }, { "epoch": 0.27662823469126313, "grad_norm": 0.599068284034729, "learning_rate": 8.661393044546855e-05, "loss": 3.0034, "step": 3374 }, { "epoch": 0.2767922111196516, "grad_norm": 0.6478520631790161, "learning_rate": 8.659546091521628e-05, "loss": 3.0538, "step": 3376 }, { "epoch": 0.27695618754803997, "grad_norm": 0.5216221809387207, "learning_rate": 8.657698062415473e-05, "loss": 2.9927, "step": 3378 }, { "epoch": 0.2771201639764284, "grad_norm": 0.49026909470558167, "learning_rate": 8.655848957771801e-05, "loss": 2.9977, "step": 3380 }, { "epoch": 0.2772841404048168, "grad_norm": 0.5494085550308228, "learning_rate": 8.653998778134338e-05, "loss": 3.0324, "step": 3382 }, { "epoch": 0.27744811683320525, "grad_norm": 0.4823310375213623, "learning_rate": 8.652147524047121e-05, "loss": 3.0145, "step": 3384 }, { "epoch": 0.27761209326159364, "grad_norm": 0.5301584005355835, "learning_rate": 8.65029519605451e-05, "loss": 3.073, "step": 3386 }, { "epoch": 0.2777760696899821, "grad_norm": 0.5409163236618042, "learning_rate": 8.648441794701176e-05, "loss": 3.0718, "step": 3388 }, { "epoch": 0.2779400461183705, "grad_norm": 0.542854905128479, "learning_rate": 8.646587320532109e-05, "loss": 3.0202, "step": 3390 }, { "epoch": 0.2781040225467589, "grad_norm": 0.5100975036621094, "learning_rate": 8.644731774092611e-05, "loss": 3.0215, "step": 3392 }, { "epoch": 0.2782679989751473, "grad_norm": 0.4922786355018616, "learning_rate": 8.642875155928302e-05, "loss": 2.9681, "step": 3394 }, { "epoch": 0.27843197540353576, "grad_norm": 0.4704815149307251, "learning_rate": 8.641017466585115e-05, "loss": 2.9829, "step": 3396 }, { "epoch": 0.27859595183192415, "grad_norm": 0.4583336114883423, "learning_rate": 8.639158706609301e-05, "loss": 2.9813, "step": 3398 }, { "epoch": 0.2787599282603126, "grad_norm": 0.46999305486679077, "learning_rate": 8.637298876547423e-05, "loss": 3.0119, "step": 3400 }, { "epoch": 0.278923904688701, "grad_norm": 0.4925120770931244, "learning_rate": 8.63543797694636e-05, "loss": 3.0416, "step": 3402 }, { "epoch": 0.27908788111708943, "grad_norm": 0.44865623116493225, "learning_rate": 8.633576008353307e-05, "loss": 3.0126, "step": 3404 }, { "epoch": 0.2792518575454778, "grad_norm": 0.4903569519519806, "learning_rate": 8.631712971315769e-05, "loss": 2.993, "step": 3406 }, { "epoch": 0.27941583397386627, "grad_norm": 0.5053166747093201, "learning_rate": 8.629848866381566e-05, "loss": 3.0154, "step": 3408 }, { "epoch": 0.27957981040225466, "grad_norm": 0.5207493305206299, "learning_rate": 8.627983694098836e-05, "loss": 3.0152, "step": 3410 }, { "epoch": 0.2797437868306431, "grad_norm": 0.4896704852581024, "learning_rate": 8.626117455016029e-05, "loss": 3.0206, "step": 3412 }, { "epoch": 0.2799077632590315, "grad_norm": 0.43985670804977417, "learning_rate": 8.62425014968191e-05, "loss": 3.0113, "step": 3414 }, { "epoch": 0.28007173968741994, "grad_norm": 0.4357748031616211, "learning_rate": 8.62238177864555e-05, "loss": 2.9796, "step": 3416 }, { "epoch": 0.28023571611580833, "grad_norm": 0.5827761888504028, "learning_rate": 8.620512342456344e-05, "loss": 3.0356, "step": 3418 }, { "epoch": 0.2803996925441968, "grad_norm": 0.5295268297195435, "learning_rate": 8.618641841663995e-05, "loss": 3.0138, "step": 3420 }, { "epoch": 0.28056366897258517, "grad_norm": 0.5759023427963257, "learning_rate": 8.616770276818515e-05, "loss": 3.0662, "step": 3422 }, { "epoch": 0.2807276454009736, "grad_norm": 0.5547720789909363, "learning_rate": 8.61489764847024e-05, "loss": 2.9373, "step": 3424 }, { "epoch": 0.280891621829362, "grad_norm": 0.5790712833404541, "learning_rate": 8.613023957169805e-05, "loss": 2.9677, "step": 3426 }, { "epoch": 0.28105559825775045, "grad_norm": 0.5202155709266663, "learning_rate": 8.611149203468169e-05, "loss": 2.9925, "step": 3428 }, { "epoch": 0.28121957468613884, "grad_norm": 0.48871245980262756, "learning_rate": 8.609273387916599e-05, "loss": 3.0403, "step": 3430 }, { "epoch": 0.2813835511145273, "grad_norm": 0.5515206456184387, "learning_rate": 8.607396511066672e-05, "loss": 3.0231, "step": 3432 }, { "epoch": 0.28154752754291573, "grad_norm": 0.5459672212600708, "learning_rate": 8.605518573470281e-05, "loss": 2.9938, "step": 3434 }, { "epoch": 0.2817115039713041, "grad_norm": 0.5675147771835327, "learning_rate": 8.603639575679627e-05, "loss": 3.004, "step": 3436 }, { "epoch": 0.28187548039969257, "grad_norm": 0.5069471597671509, "learning_rate": 8.601759518247228e-05, "loss": 3.0038, "step": 3438 }, { "epoch": 0.28203945682808096, "grad_norm": 0.4854438900947571, "learning_rate": 8.599878401725907e-05, "loss": 3.0183, "step": 3440 }, { "epoch": 0.2822034332564694, "grad_norm": 0.50712651014328, "learning_rate": 8.597996226668803e-05, "loss": 3.0431, "step": 3442 }, { "epoch": 0.2823674096848578, "grad_norm": 0.5265794396400452, "learning_rate": 8.596112993629368e-05, "loss": 3.025, "step": 3444 }, { "epoch": 0.28253138611324624, "grad_norm": 0.5010355710983276, "learning_rate": 8.594228703161358e-05, "loss": 3.0151, "step": 3446 }, { "epoch": 0.28269536254163463, "grad_norm": 0.5020803809165955, "learning_rate": 8.592343355818848e-05, "loss": 3.0397, "step": 3448 }, { "epoch": 0.2828593389700231, "grad_norm": 0.5143887996673584, "learning_rate": 8.590456952156216e-05, "loss": 3.0433, "step": 3450 }, { "epoch": 0.28302331539841147, "grad_norm": 0.46669018268585205, "learning_rate": 8.588569492728158e-05, "loss": 2.9783, "step": 3452 }, { "epoch": 0.2831872918267999, "grad_norm": 0.5204837322235107, "learning_rate": 8.586680978089675e-05, "loss": 3.0218, "step": 3454 }, { "epoch": 0.2833512682551883, "grad_norm": 0.5977779030799866, "learning_rate": 8.584791408796081e-05, "loss": 2.9815, "step": 3456 }, { "epoch": 0.28351524468357675, "grad_norm": 0.4907354414463043, "learning_rate": 8.582900785403e-05, "loss": 3.0183, "step": 3458 }, { "epoch": 0.28367922111196514, "grad_norm": 0.4910911023616791, "learning_rate": 8.581009108466365e-05, "loss": 2.9656, "step": 3460 }, { "epoch": 0.2838431975403536, "grad_norm": 0.4608771502971649, "learning_rate": 8.579116378542418e-05, "loss": 3.0116, "step": 3462 }, { "epoch": 0.284007173968742, "grad_norm": 0.5098771452903748, "learning_rate": 8.577222596187713e-05, "loss": 2.9916, "step": 3464 }, { "epoch": 0.2841711503971304, "grad_norm": 0.4630301296710968, "learning_rate": 8.575327761959111e-05, "loss": 3.014, "step": 3466 }, { "epoch": 0.2843351268255188, "grad_norm": 0.4914288818836212, "learning_rate": 8.573431876413786e-05, "loss": 3.0314, "step": 3468 }, { "epoch": 0.28449910325390726, "grad_norm": 0.5059999227523804, "learning_rate": 8.571534940109215e-05, "loss": 2.9912, "step": 3470 }, { "epoch": 0.28466307968229565, "grad_norm": 0.48450663685798645, "learning_rate": 8.569636953603193e-05, "loss": 3.0338, "step": 3472 }, { "epoch": 0.2848270561106841, "grad_norm": 0.5315168499946594, "learning_rate": 8.567737917453814e-05, "loss": 3.0025, "step": 3474 }, { "epoch": 0.2849910325390725, "grad_norm": 0.6352331042289734, "learning_rate": 8.565837832219486e-05, "loss": 3.0111, "step": 3476 }, { "epoch": 0.28515500896746093, "grad_norm": 0.5802767276763916, "learning_rate": 8.563936698458924e-05, "loss": 2.9683, "step": 3478 }, { "epoch": 0.2853189853958493, "grad_norm": 0.49376073479652405, "learning_rate": 8.562034516731155e-05, "loss": 3.0061, "step": 3480 }, { "epoch": 0.28548296182423777, "grad_norm": 0.6267409920692444, "learning_rate": 8.560131287595508e-05, "loss": 3.0239, "step": 3482 }, { "epoch": 0.28564693825262616, "grad_norm": 0.4985465407371521, "learning_rate": 8.558227011611624e-05, "loss": 2.9816, "step": 3484 }, { "epoch": 0.2858109146810146, "grad_norm": 0.5097251534461975, "learning_rate": 8.55632168933945e-05, "loss": 3.0022, "step": 3486 }, { "epoch": 0.28597489110940305, "grad_norm": 0.4792355000972748, "learning_rate": 8.554415321339245e-05, "loss": 2.9994, "step": 3488 }, { "epoch": 0.28613886753779144, "grad_norm": 0.4744062125682831, "learning_rate": 8.552507908171567e-05, "loss": 3.0114, "step": 3490 }, { "epoch": 0.2863028439661799, "grad_norm": 0.5052275657653809, "learning_rate": 8.55059945039729e-05, "loss": 2.9496, "step": 3492 }, { "epoch": 0.2864668203945683, "grad_norm": 0.49946439266204834, "learning_rate": 8.548689948577589e-05, "loss": 3.0197, "step": 3494 }, { "epoch": 0.2866307968229567, "grad_norm": 0.5956889390945435, "learning_rate": 8.546779403273952e-05, "loss": 3.0002, "step": 3496 }, { "epoch": 0.2867947732513451, "grad_norm": 0.5495591163635254, "learning_rate": 8.544867815048166e-05, "loss": 2.9783, "step": 3498 }, { "epoch": 0.28695874967973356, "grad_norm": 0.5237768292427063, "learning_rate": 8.542955184462334e-05, "loss": 2.9502, "step": 3500 }, { "epoch": 0.28712272610812195, "grad_norm": 0.5177441835403442, "learning_rate": 8.541041512078856e-05, "loss": 3.031, "step": 3502 }, { "epoch": 0.2872867025365104, "grad_norm": 0.49246928095817566, "learning_rate": 8.539126798460443e-05, "loss": 3.0439, "step": 3504 }, { "epoch": 0.2874506789648988, "grad_norm": 0.4482136368751526, "learning_rate": 8.537211044170118e-05, "loss": 2.9854, "step": 3506 }, { "epoch": 0.28761465539328723, "grad_norm": 0.44845181703567505, "learning_rate": 8.535294249771195e-05, "loss": 3.0106, "step": 3508 }, { "epoch": 0.2877786318216756, "grad_norm": 0.4529576897621155, "learning_rate": 8.533376415827311e-05, "loss": 2.9691, "step": 3510 }, { "epoch": 0.28794260825006407, "grad_norm": 0.4587607979774475, "learning_rate": 8.531457542902397e-05, "loss": 3.0012, "step": 3512 }, { "epoch": 0.28810658467845246, "grad_norm": 0.46685096621513367, "learning_rate": 8.52953763156069e-05, "loss": 2.9333, "step": 3514 }, { "epoch": 0.2882705611068409, "grad_norm": 0.4905169904232025, "learning_rate": 8.527616682366743e-05, "loss": 3.0578, "step": 3516 }, { "epoch": 0.2884345375352293, "grad_norm": 0.42239660024642944, "learning_rate": 8.5256946958854e-05, "loss": 2.8983, "step": 3518 }, { "epoch": 0.28859851396361774, "grad_norm": 0.4612962007522583, "learning_rate": 8.523771672681819e-05, "loss": 3.015, "step": 3520 }, { "epoch": 0.28876249039200613, "grad_norm": 0.4948538541793823, "learning_rate": 8.521847613321461e-05, "loss": 3.0051, "step": 3522 }, { "epoch": 0.2889264668203946, "grad_norm": 0.48041197657585144, "learning_rate": 8.51992251837009e-05, "loss": 2.9799, "step": 3524 }, { "epoch": 0.28909044324878297, "grad_norm": 0.4689272344112396, "learning_rate": 8.517996388393776e-05, "loss": 2.9721, "step": 3526 }, { "epoch": 0.2892544196771714, "grad_norm": 0.49251675605773926, "learning_rate": 8.516069223958895e-05, "loss": 3.0053, "step": 3528 }, { "epoch": 0.2894183961055598, "grad_norm": 0.5214609503746033, "learning_rate": 8.514141025632121e-05, "loss": 2.9999, "step": 3530 }, { "epoch": 0.28958237253394825, "grad_norm": 0.5497042536735535, "learning_rate": 8.51221179398044e-05, "loss": 3.0013, "step": 3532 }, { "epoch": 0.28974634896233664, "grad_norm": 0.5290588140487671, "learning_rate": 8.510281529571135e-05, "loss": 2.9671, "step": 3534 }, { "epoch": 0.2899103253907251, "grad_norm": 0.5457175970077515, "learning_rate": 8.508350232971798e-05, "loss": 2.9757, "step": 3536 }, { "epoch": 0.2900743018191135, "grad_norm": 0.47880294919013977, "learning_rate": 8.506417904750321e-05, "loss": 3.0044, "step": 3538 }, { "epoch": 0.2902382782475019, "grad_norm": 0.454750120639801, "learning_rate": 8.504484545474902e-05, "loss": 2.9854, "step": 3540 }, { "epoch": 0.2904022546758903, "grad_norm": 0.5449289679527283, "learning_rate": 8.502550155714039e-05, "loss": 3.0371, "step": 3542 }, { "epoch": 0.29056623110427876, "grad_norm": 0.4773004651069641, "learning_rate": 8.500614736036536e-05, "loss": 3.0289, "step": 3544 }, { "epoch": 0.2907302075326672, "grad_norm": 0.515762984752655, "learning_rate": 8.498678287011497e-05, "loss": 3.0323, "step": 3546 }, { "epoch": 0.2908941839610556, "grad_norm": 0.502001941204071, "learning_rate": 8.496740809208332e-05, "loss": 3.0344, "step": 3548 }, { "epoch": 0.29105816038944404, "grad_norm": 0.46195709705352783, "learning_rate": 8.494802303196751e-05, "loss": 2.9701, "step": 3550 }, { "epoch": 0.29122213681783243, "grad_norm": 0.43657612800598145, "learning_rate": 8.492862769546768e-05, "loss": 2.9849, "step": 3552 }, { "epoch": 0.2913861132462209, "grad_norm": 0.4737352132797241, "learning_rate": 8.490922208828698e-05, "loss": 3.0034, "step": 3554 }, { "epoch": 0.29155008967460927, "grad_norm": 0.47753196954727173, "learning_rate": 8.488980621613157e-05, "loss": 2.9693, "step": 3556 }, { "epoch": 0.2917140661029977, "grad_norm": 0.4862592816352844, "learning_rate": 8.487038008471066e-05, "loss": 3.0158, "step": 3558 }, { "epoch": 0.2918780425313861, "grad_norm": 0.5238935351371765, "learning_rate": 8.485094369973644e-05, "loss": 3.0186, "step": 3560 }, { "epoch": 0.29204201895977455, "grad_norm": 0.47579601407051086, "learning_rate": 8.483149706692415e-05, "loss": 2.9984, "step": 3562 }, { "epoch": 0.29220599538816294, "grad_norm": 0.45539939403533936, "learning_rate": 8.481204019199203e-05, "loss": 3.0183, "step": 3564 }, { "epoch": 0.2923699718165514, "grad_norm": 0.4498404860496521, "learning_rate": 8.479257308066129e-05, "loss": 2.9848, "step": 3566 }, { "epoch": 0.2925339482449398, "grad_norm": 0.5307414531707764, "learning_rate": 8.477309573865623e-05, "loss": 3.0112, "step": 3568 }, { "epoch": 0.2926979246733282, "grad_norm": 0.4621909558773041, "learning_rate": 8.47536081717041e-05, "loss": 3.0196, "step": 3570 }, { "epoch": 0.2928619011017166, "grad_norm": 0.4816957712173462, "learning_rate": 8.473411038553518e-05, "loss": 3.037, "step": 3572 }, { "epoch": 0.29302587753010506, "grad_norm": 0.4664321541786194, "learning_rate": 8.471460238588275e-05, "loss": 2.9409, "step": 3574 }, { "epoch": 0.29318985395849345, "grad_norm": 0.5383502244949341, "learning_rate": 8.469508417848309e-05, "loss": 3.0063, "step": 3576 }, { "epoch": 0.2933538303868819, "grad_norm": 0.5679525136947632, "learning_rate": 8.467555576907547e-05, "loss": 2.9717, "step": 3578 }, { "epoch": 0.2935178068152703, "grad_norm": 0.5316475629806519, "learning_rate": 8.465601716340217e-05, "loss": 2.9873, "step": 3580 }, { "epoch": 0.29368178324365873, "grad_norm": 0.5575289726257324, "learning_rate": 8.46364683672085e-05, "loss": 3.0346, "step": 3582 }, { "epoch": 0.2938457596720471, "grad_norm": 0.49023500084877014, "learning_rate": 8.461690938624272e-05, "loss": 2.9569, "step": 3584 }, { "epoch": 0.29400973610043557, "grad_norm": 0.5434648394584656, "learning_rate": 8.45973402262561e-05, "loss": 2.9681, "step": 3586 }, { "epoch": 0.29417371252882396, "grad_norm": 0.5375614166259766, "learning_rate": 8.45777608930029e-05, "loss": 2.9967, "step": 3588 }, { "epoch": 0.2943376889572124, "grad_norm": 0.492756724357605, "learning_rate": 8.455817139224038e-05, "loss": 2.9579, "step": 3590 }, { "epoch": 0.2945016653856008, "grad_norm": 0.5231446623802185, "learning_rate": 8.453857172972881e-05, "loss": 3.0131, "step": 3592 }, { "epoch": 0.29466564181398924, "grad_norm": 0.47618967294692993, "learning_rate": 8.451896191123139e-05, "loss": 2.9707, "step": 3594 }, { "epoch": 0.29482961824237763, "grad_norm": 0.4701117277145386, "learning_rate": 8.449934194251436e-05, "loss": 2.9816, "step": 3596 }, { "epoch": 0.2949935946707661, "grad_norm": 0.5265464782714844, "learning_rate": 8.447971182934695e-05, "loss": 2.9723, "step": 3598 }, { "epoch": 0.29515757109915447, "grad_norm": 0.4834996461868286, "learning_rate": 8.446007157750129e-05, "loss": 3.0243, "step": 3600 }, { "epoch": 0.2953215475275429, "grad_norm": 0.5081709623336792, "learning_rate": 8.444042119275259e-05, "loss": 2.991, "step": 3602 }, { "epoch": 0.29548552395593136, "grad_norm": 0.4928586781024933, "learning_rate": 8.4420760680879e-05, "loss": 2.9471, "step": 3604 }, { "epoch": 0.29564950038431975, "grad_norm": 0.4916239380836487, "learning_rate": 8.440109004766164e-05, "loss": 3.0252, "step": 3606 }, { "epoch": 0.2958134768127082, "grad_norm": 0.5131044387817383, "learning_rate": 8.438140929888461e-05, "loss": 3.037, "step": 3608 }, { "epoch": 0.2959774532410966, "grad_norm": 0.5201687812805176, "learning_rate": 8.436171844033498e-05, "loss": 3.0245, "step": 3610 }, { "epoch": 0.29614142966948503, "grad_norm": 0.5022949576377869, "learning_rate": 8.434201747780281e-05, "loss": 2.9876, "step": 3612 }, { "epoch": 0.2963054060978734, "grad_norm": 0.48507484793663025, "learning_rate": 8.432230641708112e-05, "loss": 3.0006, "step": 3614 }, { "epoch": 0.29646938252626187, "grad_norm": 0.5075755715370178, "learning_rate": 8.43025852639659e-05, "loss": 2.9571, "step": 3616 }, { "epoch": 0.29663335895465026, "grad_norm": 0.5907678604125977, "learning_rate": 8.428285402425614e-05, "loss": 2.988, "step": 3618 }, { "epoch": 0.2967973353830387, "grad_norm": 0.5069214105606079, "learning_rate": 8.426311270375372e-05, "loss": 2.997, "step": 3620 }, { "epoch": 0.2969613118114271, "grad_norm": 0.48910194635391235, "learning_rate": 8.424336130826354e-05, "loss": 3.0606, "step": 3622 }, { "epoch": 0.29712528823981554, "grad_norm": 0.47257429361343384, "learning_rate": 8.422359984359347e-05, "loss": 2.9472, "step": 3624 }, { "epoch": 0.29728926466820393, "grad_norm": 0.513735830783844, "learning_rate": 8.42038283155543e-05, "loss": 2.9971, "step": 3626 }, { "epoch": 0.2974532410965924, "grad_norm": 0.4457564651966095, "learning_rate": 8.41840467299598e-05, "loss": 2.99, "step": 3628 }, { "epoch": 0.29761721752498077, "grad_norm": 0.46787014603614807, "learning_rate": 8.416425509262673e-05, "loss": 3.04, "step": 3630 }, { "epoch": 0.2977811939533692, "grad_norm": 0.4722326397895813, "learning_rate": 8.414445340937474e-05, "loss": 2.9925, "step": 3632 }, { "epoch": 0.2979451703817576, "grad_norm": 0.48793745040893555, "learning_rate": 8.41246416860265e-05, "loss": 2.9601, "step": 3634 }, { "epoch": 0.29810914681014605, "grad_norm": 0.506376326084137, "learning_rate": 8.410481992840757e-05, "loss": 2.9581, "step": 3636 }, { "epoch": 0.29827312323853444, "grad_norm": 0.48079127073287964, "learning_rate": 8.408498814234649e-05, "loss": 2.9846, "step": 3638 }, { "epoch": 0.2984370996669229, "grad_norm": 0.49062538146972656, "learning_rate": 8.406514633367478e-05, "loss": 2.9784, "step": 3640 }, { "epoch": 0.2986010760953113, "grad_norm": 0.4508991837501526, "learning_rate": 8.404529450822687e-05, "loss": 2.9467, "step": 3642 }, { "epoch": 0.2987650525236997, "grad_norm": 0.49012279510498047, "learning_rate": 8.402543267184012e-05, "loss": 3.0089, "step": 3644 }, { "epoch": 0.2989290289520881, "grad_norm": 0.48352885246276855, "learning_rate": 8.400556083035487e-05, "loss": 2.9665, "step": 3646 }, { "epoch": 0.29909300538047656, "grad_norm": 0.48871710896492004, "learning_rate": 8.39856789896144e-05, "loss": 2.9501, "step": 3648 }, { "epoch": 0.29925698180886495, "grad_norm": 0.5270124673843384, "learning_rate": 8.396578715546489e-05, "loss": 2.9884, "step": 3650 }, { "epoch": 0.2994209582372534, "grad_norm": 0.5761973261833191, "learning_rate": 8.394588533375551e-05, "loss": 2.9354, "step": 3652 }, { "epoch": 0.2995849346656418, "grad_norm": 0.5598081946372986, "learning_rate": 8.392597353033836e-05, "loss": 2.9783, "step": 3654 }, { "epoch": 0.29974891109403023, "grad_norm": 0.6018843054771423, "learning_rate": 8.39060517510684e-05, "loss": 2.9262, "step": 3656 }, { "epoch": 0.2999128875224186, "grad_norm": 0.5428453683853149, "learning_rate": 8.388612000180365e-05, "loss": 2.9805, "step": 3658 }, { "epoch": 0.30007686395080707, "grad_norm": 0.5947820544242859, "learning_rate": 8.386617828840495e-05, "loss": 3.0226, "step": 3660 }, { "epoch": 0.3002408403791955, "grad_norm": 0.5261856913566589, "learning_rate": 8.384622661673613e-05, "loss": 3.0118, "step": 3662 }, { "epoch": 0.3004048168075839, "grad_norm": 0.5318114757537842, "learning_rate": 8.382626499266393e-05, "loss": 3.0048, "step": 3664 }, { "epoch": 0.30056879323597235, "grad_norm": 0.5498486161231995, "learning_rate": 8.380629342205802e-05, "loss": 3.0076, "step": 3666 }, { "epoch": 0.30073276966436074, "grad_norm": 0.5416869521141052, "learning_rate": 8.378631191079098e-05, "loss": 3.0086, "step": 3668 }, { "epoch": 0.3008967460927492, "grad_norm": 0.5416050553321838, "learning_rate": 8.376632046473836e-05, "loss": 3.0309, "step": 3670 }, { "epoch": 0.3010607225211376, "grad_norm": 0.5648167133331299, "learning_rate": 8.374631908977856e-05, "loss": 3.0105, "step": 3672 }, { "epoch": 0.301224698949526, "grad_norm": 0.5651098489761353, "learning_rate": 8.372630779179295e-05, "loss": 2.9895, "step": 3674 }, { "epoch": 0.3013886753779144, "grad_norm": 0.5542349219322205, "learning_rate": 8.370628657666581e-05, "loss": 2.9952, "step": 3676 }, { "epoch": 0.30155265180630286, "grad_norm": 0.5925988554954529, "learning_rate": 8.368625545028435e-05, "loss": 2.9552, "step": 3678 }, { "epoch": 0.30171662823469125, "grad_norm": 0.5026484131813049, "learning_rate": 8.366621441853864e-05, "loss": 2.9809, "step": 3680 }, { "epoch": 0.3018806046630797, "grad_norm": 0.54961097240448, "learning_rate": 8.36461634873217e-05, "loss": 3.0241, "step": 3682 }, { "epoch": 0.3020445810914681, "grad_norm": 0.539864182472229, "learning_rate": 8.362610266252948e-05, "loss": 2.9108, "step": 3684 }, { "epoch": 0.30220855751985654, "grad_norm": 0.5144716501235962, "learning_rate": 8.360603195006081e-05, "loss": 2.9453, "step": 3686 }, { "epoch": 0.3023725339482449, "grad_norm": 0.5098161697387695, "learning_rate": 8.358595135581746e-05, "loss": 2.9514, "step": 3688 }, { "epoch": 0.30253651037663337, "grad_norm": 0.5552378296852112, "learning_rate": 8.356586088570402e-05, "loss": 2.9992, "step": 3690 }, { "epoch": 0.30270048680502176, "grad_norm": 0.5220159888267517, "learning_rate": 8.354576054562812e-05, "loss": 2.949, "step": 3692 }, { "epoch": 0.3028644632334102, "grad_norm": 0.5612363219261169, "learning_rate": 8.352565034150015e-05, "loss": 2.9787, "step": 3694 }, { "epoch": 0.3030284396617986, "grad_norm": 0.620829701423645, "learning_rate": 8.350553027923354e-05, "loss": 2.948, "step": 3696 }, { "epoch": 0.30319241609018704, "grad_norm": 0.5301763415336609, "learning_rate": 8.348540036474445e-05, "loss": 2.899, "step": 3698 }, { "epoch": 0.30335639251857544, "grad_norm": 0.5472487807273865, "learning_rate": 8.346526060395214e-05, "loss": 2.9334, "step": 3700 }, { "epoch": 0.3035203689469639, "grad_norm": 0.4763239920139313, "learning_rate": 8.34451110027786e-05, "loss": 2.9492, "step": 3702 }, { "epoch": 0.30368434537535227, "grad_norm": 0.4982868432998657, "learning_rate": 8.342495156714877e-05, "loss": 2.9982, "step": 3704 }, { "epoch": 0.3038483218037407, "grad_norm": 0.4549109935760498, "learning_rate": 8.34047823029905e-05, "loss": 2.9983, "step": 3706 }, { "epoch": 0.3040122982321291, "grad_norm": 0.4498273730278015, "learning_rate": 8.338460321623453e-05, "loss": 2.9843, "step": 3708 }, { "epoch": 0.30417627466051755, "grad_norm": 0.4424727261066437, "learning_rate": 8.336441431281443e-05, "loss": 2.9798, "step": 3710 }, { "epoch": 0.30434025108890594, "grad_norm": 0.4585806131362915, "learning_rate": 8.334421559866675e-05, "loss": 2.9663, "step": 3712 }, { "epoch": 0.3045042275172944, "grad_norm": 0.4909418523311615, "learning_rate": 8.332400707973082e-05, "loss": 2.9489, "step": 3714 }, { "epoch": 0.3046682039456828, "grad_norm": 0.49454036355018616, "learning_rate": 8.330378876194896e-05, "loss": 2.9942, "step": 3716 }, { "epoch": 0.3048321803740712, "grad_norm": 0.4984138011932373, "learning_rate": 8.328356065126628e-05, "loss": 2.9846, "step": 3718 }, { "epoch": 0.3049961568024597, "grad_norm": 0.5394349694252014, "learning_rate": 8.326332275363085e-05, "loss": 2.9815, "step": 3720 }, { "epoch": 0.30516013323084806, "grad_norm": 0.5089222192764282, "learning_rate": 8.324307507499352e-05, "loss": 3.0284, "step": 3722 }, { "epoch": 0.3053241096592365, "grad_norm": 0.5222364664077759, "learning_rate": 8.322281762130813e-05, "loss": 3.0041, "step": 3724 }, { "epoch": 0.3054880860876249, "grad_norm": 0.5485755801200867, "learning_rate": 8.320255039853128e-05, "loss": 2.9494, "step": 3726 }, { "epoch": 0.30565206251601335, "grad_norm": 0.664840579032898, "learning_rate": 8.318227341262254e-05, "loss": 2.9965, "step": 3728 }, { "epoch": 0.30581603894440174, "grad_norm": 0.5262017846107483, "learning_rate": 8.316198666954431e-05, "loss": 2.9494, "step": 3730 }, { "epoch": 0.3059800153727902, "grad_norm": 0.504371166229248, "learning_rate": 8.314169017526185e-05, "loss": 2.9769, "step": 3732 }, { "epoch": 0.30614399180117857, "grad_norm": 0.4443201720714569, "learning_rate": 8.312138393574328e-05, "loss": 2.9735, "step": 3734 }, { "epoch": 0.306307968229567, "grad_norm": 0.4749385714530945, "learning_rate": 8.310106795695962e-05, "loss": 2.9146, "step": 3736 }, { "epoch": 0.3064719446579554, "grad_norm": 0.4916543662548065, "learning_rate": 8.308074224488473e-05, "loss": 3.0123, "step": 3738 }, { "epoch": 0.30663592108634385, "grad_norm": 0.47765418887138367, "learning_rate": 8.306040680549534e-05, "loss": 2.9993, "step": 3740 }, { "epoch": 0.30679989751473224, "grad_norm": 0.4428040385246277, "learning_rate": 8.304006164477105e-05, "loss": 2.9951, "step": 3742 }, { "epoch": 0.3069638739431207, "grad_norm": 0.48747119307518005, "learning_rate": 8.301970676869428e-05, "loss": 2.9786, "step": 3744 }, { "epoch": 0.3071278503715091, "grad_norm": 0.47519078850746155, "learning_rate": 8.299934218325036e-05, "loss": 3.0331, "step": 3746 }, { "epoch": 0.3072918267998975, "grad_norm": 0.5639200806617737, "learning_rate": 8.297896789442742e-05, "loss": 2.9629, "step": 3748 }, { "epoch": 0.3074558032282859, "grad_norm": 0.4715816080570221, "learning_rate": 8.295858390821651e-05, "loss": 2.9719, "step": 3750 }, { "epoch": 0.30761977965667436, "grad_norm": 0.46770063042640686, "learning_rate": 8.293819023061145e-05, "loss": 2.934, "step": 3752 }, { "epoch": 0.30778375608506275, "grad_norm": 0.47951412200927734, "learning_rate": 8.291778686760899e-05, "loss": 2.9206, "step": 3754 }, { "epoch": 0.3079477325134512, "grad_norm": 0.5599160194396973, "learning_rate": 8.289737382520868e-05, "loss": 2.9962, "step": 3756 }, { "epoch": 0.3081117089418396, "grad_norm": 0.4405677318572998, "learning_rate": 8.287695110941292e-05, "loss": 2.9984, "step": 3758 }, { "epoch": 0.30827568537022804, "grad_norm": 0.4612639844417572, "learning_rate": 8.285651872622695e-05, "loss": 2.9552, "step": 3760 }, { "epoch": 0.3084396617986164, "grad_norm": 0.5191618800163269, "learning_rate": 8.283607668165888e-05, "loss": 2.9771, "step": 3762 }, { "epoch": 0.3086036382270049, "grad_norm": 0.4907604157924652, "learning_rate": 8.281562498171964e-05, "loss": 2.9342, "step": 3764 }, { "epoch": 0.30876761465539326, "grad_norm": 0.46844518184661865, "learning_rate": 8.279516363242302e-05, "loss": 2.97, "step": 3766 }, { "epoch": 0.3089315910837817, "grad_norm": 0.5155321955680847, "learning_rate": 8.277469263978562e-05, "loss": 2.9505, "step": 3768 }, { "epoch": 0.3090955675121701, "grad_norm": 0.4812948405742645, "learning_rate": 8.275421200982689e-05, "loss": 2.9261, "step": 3770 }, { "epoch": 0.30925954394055855, "grad_norm": 0.4431648254394531, "learning_rate": 8.273372174856911e-05, "loss": 3.0066, "step": 3772 }, { "epoch": 0.309423520368947, "grad_norm": 0.5174999237060547, "learning_rate": 8.271322186203739e-05, "loss": 2.9845, "step": 3774 }, { "epoch": 0.3095874967973354, "grad_norm": 0.5325742363929749, "learning_rate": 8.269271235625965e-05, "loss": 2.891, "step": 3776 }, { "epoch": 0.30975147322572383, "grad_norm": 0.49371999502182007, "learning_rate": 8.26721932372667e-05, "loss": 2.9165, "step": 3778 }, { "epoch": 0.3099154496541122, "grad_norm": 0.46111825108528137, "learning_rate": 8.265166451109213e-05, "loss": 2.9865, "step": 3780 }, { "epoch": 0.31007942608250066, "grad_norm": 0.49692320823669434, "learning_rate": 8.263112618377235e-05, "loss": 2.8851, "step": 3782 }, { "epoch": 0.31024340251088905, "grad_norm": 0.5122151970863342, "learning_rate": 8.261057826134664e-05, "loss": 3.0221, "step": 3784 }, { "epoch": 0.3104073789392775, "grad_norm": 0.4640101194381714, "learning_rate": 8.259002074985703e-05, "loss": 3.0407, "step": 3786 }, { "epoch": 0.3105713553676659, "grad_norm": 0.4947657585144043, "learning_rate": 8.256945365534841e-05, "loss": 2.9391, "step": 3788 }, { "epoch": 0.31073533179605434, "grad_norm": 0.5168522596359253, "learning_rate": 8.254887698386851e-05, "loss": 2.9593, "step": 3790 }, { "epoch": 0.3108993082244427, "grad_norm": 0.5186290740966797, "learning_rate": 8.252829074146784e-05, "loss": 3.0004, "step": 3792 }, { "epoch": 0.3110632846528312, "grad_norm": 0.5075885653495789, "learning_rate": 8.250769493419973e-05, "loss": 2.9619, "step": 3794 }, { "epoch": 0.31122726108121956, "grad_norm": 0.49640703201293945, "learning_rate": 8.248708956812033e-05, "loss": 3.0015, "step": 3796 }, { "epoch": 0.311391237509608, "grad_norm": 0.4813206195831299, "learning_rate": 8.246647464928863e-05, "loss": 2.9537, "step": 3798 }, { "epoch": 0.3115552139379964, "grad_norm": 0.48675093054771423, "learning_rate": 8.244585018376634e-05, "loss": 2.9733, "step": 3800 }, { "epoch": 0.31171919036638485, "grad_norm": 0.48205146193504333, "learning_rate": 8.24252161776181e-05, "loss": 2.9532, "step": 3802 }, { "epoch": 0.31188316679477324, "grad_norm": 0.4845637381076813, "learning_rate": 8.240457263691123e-05, "loss": 2.9559, "step": 3804 }, { "epoch": 0.3120471432231617, "grad_norm": 0.4753011465072632, "learning_rate": 8.238391956771595e-05, "loss": 2.8951, "step": 3806 }, { "epoch": 0.3122111196515501, "grad_norm": 0.47336089611053467, "learning_rate": 8.236325697610526e-05, "loss": 2.8971, "step": 3808 }, { "epoch": 0.3123750960799385, "grad_norm": 0.539230227470398, "learning_rate": 8.23425848681549e-05, "loss": 2.9824, "step": 3810 }, { "epoch": 0.3125390725083269, "grad_norm": 0.49993154406547546, "learning_rate": 8.232190324994348e-05, "loss": 2.9683, "step": 3812 }, { "epoch": 0.31270304893671536, "grad_norm": 0.46976861357688904, "learning_rate": 8.23012121275524e-05, "loss": 2.9599, "step": 3814 }, { "epoch": 0.31286702536510375, "grad_norm": 0.47436973452568054, "learning_rate": 8.228051150706582e-05, "loss": 2.9725, "step": 3816 }, { "epoch": 0.3130310017934922, "grad_norm": 0.4988282024860382, "learning_rate": 8.22598013945707e-05, "loss": 2.9868, "step": 3818 }, { "epoch": 0.3131949782218806, "grad_norm": 0.4894435405731201, "learning_rate": 8.223908179615681e-05, "loss": 2.936, "step": 3820 }, { "epoch": 0.31335895465026903, "grad_norm": 0.45384493470191956, "learning_rate": 8.22183527179167e-05, "loss": 2.9694, "step": 3822 }, { "epoch": 0.3135229310786574, "grad_norm": 0.4598720967769623, "learning_rate": 8.219761416594569e-05, "loss": 2.9709, "step": 3824 }, { "epoch": 0.31368690750704586, "grad_norm": 0.47938069701194763, "learning_rate": 8.217686614634194e-05, "loss": 2.9953, "step": 3826 }, { "epoch": 0.31385088393543426, "grad_norm": 0.473412424325943, "learning_rate": 8.215610866520631e-05, "loss": 3.0166, "step": 3828 }, { "epoch": 0.3140148603638227, "grad_norm": 0.4905776381492615, "learning_rate": 8.213534172864252e-05, "loss": 2.899, "step": 3830 }, { "epoch": 0.31417883679221115, "grad_norm": 0.4890548288822174, "learning_rate": 8.211456534275702e-05, "loss": 2.9694, "step": 3832 }, { "epoch": 0.31434281322059954, "grad_norm": 0.4220986068248749, "learning_rate": 8.209377951365908e-05, "loss": 2.9941, "step": 3834 }, { "epoch": 0.314506789648988, "grad_norm": 0.4571814239025116, "learning_rate": 8.207298424746071e-05, "loss": 2.987, "step": 3836 }, { "epoch": 0.3146707660773764, "grad_norm": 0.4578603208065033, "learning_rate": 8.20521795502767e-05, "loss": 3.0315, "step": 3838 }, { "epoch": 0.3148347425057648, "grad_norm": 0.49688267707824707, "learning_rate": 8.203136542822464e-05, "loss": 2.981, "step": 3840 }, { "epoch": 0.3149987189341532, "grad_norm": 0.4531402885913849, "learning_rate": 8.201054188742485e-05, "loss": 2.989, "step": 3842 }, { "epoch": 0.31516269536254166, "grad_norm": 0.4830644130706787, "learning_rate": 8.198970893400047e-05, "loss": 2.9532, "step": 3844 }, { "epoch": 0.31532667179093005, "grad_norm": 0.4996969997882843, "learning_rate": 8.196886657407739e-05, "loss": 2.968, "step": 3846 }, { "epoch": 0.3154906482193185, "grad_norm": 0.48447179794311523, "learning_rate": 8.194801481378421e-05, "loss": 2.9828, "step": 3848 }, { "epoch": 0.3156546246477069, "grad_norm": 0.4997907280921936, "learning_rate": 8.192715365925239e-05, "loss": 2.9826, "step": 3850 }, { "epoch": 0.31581860107609533, "grad_norm": 0.4326731860637665, "learning_rate": 8.190628311661607e-05, "loss": 2.9688, "step": 3852 }, { "epoch": 0.3159825775044837, "grad_norm": 0.45658349990844727, "learning_rate": 8.188540319201219e-05, "loss": 3.0064, "step": 3854 }, { "epoch": 0.31614655393287217, "grad_norm": 0.4541572034358978, "learning_rate": 8.186451389158045e-05, "loss": 3.0013, "step": 3856 }, { "epoch": 0.31631053036126056, "grad_norm": 0.4665530025959015, "learning_rate": 8.184361522146331e-05, "loss": 2.9502, "step": 3858 }, { "epoch": 0.316474506789649, "grad_norm": 0.4613116979598999, "learning_rate": 8.182270718780596e-05, "loss": 2.9217, "step": 3860 }, { "epoch": 0.3166384832180374, "grad_norm": 0.46976956725120544, "learning_rate": 8.180178979675635e-05, "loss": 2.9366, "step": 3862 }, { "epoch": 0.31680245964642584, "grad_norm": 0.5204563736915588, "learning_rate": 8.178086305446522e-05, "loss": 3.059, "step": 3864 }, { "epoch": 0.31696643607481423, "grad_norm": 0.5135278105735779, "learning_rate": 8.1759926967086e-05, "loss": 2.8777, "step": 3866 }, { "epoch": 0.3171304125032027, "grad_norm": 0.49763861298561096, "learning_rate": 8.173898154077491e-05, "loss": 2.9745, "step": 3868 }, { "epoch": 0.31729438893159106, "grad_norm": 0.4697003662586212, "learning_rate": 8.17180267816909e-05, "loss": 2.9959, "step": 3870 }, { "epoch": 0.3174583653599795, "grad_norm": 0.47999435663223267, "learning_rate": 8.169706269599567e-05, "loss": 2.9694, "step": 3872 }, { "epoch": 0.3176223417883679, "grad_norm": 0.47604313492774963, "learning_rate": 8.167608928985364e-05, "loss": 2.9576, "step": 3874 }, { "epoch": 0.31778631821675635, "grad_norm": 0.5373707413673401, "learning_rate": 8.165510656943203e-05, "loss": 2.9864, "step": 3876 }, { "epoch": 0.31795029464514474, "grad_norm": 0.47880983352661133, "learning_rate": 8.163411454090073e-05, "loss": 2.9901, "step": 3878 }, { "epoch": 0.3181142710735332, "grad_norm": 0.45610833168029785, "learning_rate": 8.161311321043241e-05, "loss": 2.9617, "step": 3880 }, { "epoch": 0.3182782475019216, "grad_norm": 0.47816580533981323, "learning_rate": 8.159210258420247e-05, "loss": 2.9654, "step": 3882 }, { "epoch": 0.31844222393031, "grad_norm": 0.45326411724090576, "learning_rate": 8.157108266838902e-05, "loss": 2.9668, "step": 3884 }, { "epoch": 0.3186062003586984, "grad_norm": 0.4615935683250427, "learning_rate": 8.15500534691729e-05, "loss": 2.9791, "step": 3886 }, { "epoch": 0.31877017678708686, "grad_norm": 0.439890593290329, "learning_rate": 8.152901499273774e-05, "loss": 3.0039, "step": 3888 }, { "epoch": 0.3189341532154753, "grad_norm": 0.4957211911678314, "learning_rate": 8.150796724526982e-05, "loss": 2.9812, "step": 3890 }, { "epoch": 0.3190981296438637, "grad_norm": 0.49575918912887573, "learning_rate": 8.148691023295818e-05, "loss": 2.9451, "step": 3892 }, { "epoch": 0.31926210607225214, "grad_norm": 0.4739311635494232, "learning_rate": 8.14658439619946e-05, "loss": 2.9739, "step": 3894 }, { "epoch": 0.31942608250064053, "grad_norm": 0.4505142867565155, "learning_rate": 8.144476843857358e-05, "loss": 2.9556, "step": 3896 }, { "epoch": 0.319590058929029, "grad_norm": 0.507182776927948, "learning_rate": 8.14236836688923e-05, "loss": 2.9812, "step": 3898 }, { "epoch": 0.31975403535741737, "grad_norm": 0.5078046321868896, "learning_rate": 8.14025896591507e-05, "loss": 2.9509, "step": 3900 }, { "epoch": 0.3199180117858058, "grad_norm": 0.5385080575942993, "learning_rate": 8.138148641555143e-05, "loss": 2.9495, "step": 3902 }, { "epoch": 0.3200819882141942, "grad_norm": 0.508383572101593, "learning_rate": 8.136037394429982e-05, "loss": 2.9564, "step": 3904 }, { "epoch": 0.32024596464258265, "grad_norm": 0.4714992046356201, "learning_rate": 8.133925225160399e-05, "loss": 2.9736, "step": 3906 }, { "epoch": 0.32040994107097104, "grad_norm": 0.4709402918815613, "learning_rate": 8.13181213436747e-05, "loss": 2.9441, "step": 3908 }, { "epoch": 0.3205739174993595, "grad_norm": 0.4701831340789795, "learning_rate": 8.129698122672543e-05, "loss": 2.9284, "step": 3910 }, { "epoch": 0.3207378939277479, "grad_norm": 0.46206173300743103, "learning_rate": 8.12758319069724e-05, "loss": 2.9342, "step": 3912 }, { "epoch": 0.3209018703561363, "grad_norm": 0.6080299615859985, "learning_rate": 8.125467339063452e-05, "loss": 2.9444, "step": 3914 }, { "epoch": 0.3210658467845247, "grad_norm": 0.5376178026199341, "learning_rate": 8.123350568393338e-05, "loss": 2.964, "step": 3916 }, { "epoch": 0.32122982321291316, "grad_norm": 0.46582067012786865, "learning_rate": 8.12123287930933e-05, "loss": 2.9762, "step": 3918 }, { "epoch": 0.32139379964130155, "grad_norm": 0.4999258816242218, "learning_rate": 8.119114272434134e-05, "loss": 2.914, "step": 3920 }, { "epoch": 0.32155777606969, "grad_norm": 0.45092910528182983, "learning_rate": 8.116994748390714e-05, "loss": 2.9802, "step": 3922 }, { "epoch": 0.3217217524980784, "grad_norm": 0.4431985318660736, "learning_rate": 8.114874307802316e-05, "loss": 2.972, "step": 3924 }, { "epoch": 0.32188572892646683, "grad_norm": 0.49656012654304504, "learning_rate": 8.11275295129245e-05, "loss": 2.9692, "step": 3926 }, { "epoch": 0.3220497053548552, "grad_norm": 0.528873085975647, "learning_rate": 8.110630679484896e-05, "loss": 2.9603, "step": 3928 }, { "epoch": 0.32221368178324367, "grad_norm": 0.4573817849159241, "learning_rate": 8.1085074930037e-05, "loss": 2.9092, "step": 3930 }, { "epoch": 0.32237765821163206, "grad_norm": 0.536751925945282, "learning_rate": 8.106383392473185e-05, "loss": 2.9176, "step": 3932 }, { "epoch": 0.3225416346400205, "grad_norm": 0.5677569508552551, "learning_rate": 8.104258378517932e-05, "loss": 2.9718, "step": 3934 }, { "epoch": 0.3227056110684089, "grad_norm": 0.5534135103225708, "learning_rate": 8.1021324517628e-05, "loss": 2.9247, "step": 3936 }, { "epoch": 0.32286958749679734, "grad_norm": 0.48747488856315613, "learning_rate": 8.100005612832915e-05, "loss": 2.9566, "step": 3938 }, { "epoch": 0.32303356392518573, "grad_norm": 0.5087941884994507, "learning_rate": 8.097877862353664e-05, "loss": 2.9372, "step": 3940 }, { "epoch": 0.3231975403535742, "grad_norm": 0.5312908291816711, "learning_rate": 8.09574920095071e-05, "loss": 2.9734, "step": 3942 }, { "epoch": 0.32336151678196257, "grad_norm": 0.48324260115623474, "learning_rate": 8.09361962924998e-05, "loss": 2.9864, "step": 3944 }, { "epoch": 0.323525493210351, "grad_norm": 0.4854391813278198, "learning_rate": 8.09148914787767e-05, "loss": 3.0042, "step": 3946 }, { "epoch": 0.32368946963873946, "grad_norm": 0.4973825514316559, "learning_rate": 8.08935775746024e-05, "loss": 2.9871, "step": 3948 }, { "epoch": 0.32385344606712785, "grad_norm": 0.44147545099258423, "learning_rate": 8.087225458624425e-05, "loss": 2.8988, "step": 3950 }, { "epoch": 0.3240174224955163, "grad_norm": 0.462311714887619, "learning_rate": 8.085092251997221e-05, "loss": 2.9795, "step": 3952 }, { "epoch": 0.3241813989239047, "grad_norm": 0.5237289071083069, "learning_rate": 8.082958138205892e-05, "loss": 3.0, "step": 3954 }, { "epoch": 0.32434537535229313, "grad_norm": 0.47140932083129883, "learning_rate": 8.080823117877968e-05, "loss": 2.8735, "step": 3956 }, { "epoch": 0.3245093517806815, "grad_norm": 0.4941592514514923, "learning_rate": 8.078687191641248e-05, "loss": 2.9496, "step": 3958 }, { "epoch": 0.32467332820906997, "grad_norm": 0.5430725812911987, "learning_rate": 8.076550360123796e-05, "loss": 2.907, "step": 3960 }, { "epoch": 0.32483730463745836, "grad_norm": 0.5308889150619507, "learning_rate": 8.074412623953942e-05, "loss": 2.9895, "step": 3962 }, { "epoch": 0.3250012810658468, "grad_norm": 0.46821293234825134, "learning_rate": 8.072273983760282e-05, "loss": 2.9611, "step": 3964 }, { "epoch": 0.3251652574942352, "grad_norm": 0.4651613235473633, "learning_rate": 8.070134440171679e-05, "loss": 2.934, "step": 3966 }, { "epoch": 0.32532923392262364, "grad_norm": 0.5212334990501404, "learning_rate": 8.06799399381726e-05, "loss": 2.9629, "step": 3968 }, { "epoch": 0.32549321035101203, "grad_norm": 0.495918869972229, "learning_rate": 8.065852645326419e-05, "loss": 3.0066, "step": 3970 }, { "epoch": 0.3256571867794005, "grad_norm": 0.46576234698295593, "learning_rate": 8.063710395328812e-05, "loss": 2.9201, "step": 3972 }, { "epoch": 0.32582116320778887, "grad_norm": 0.495717316865921, "learning_rate": 8.061567244454368e-05, "loss": 2.9373, "step": 3974 }, { "epoch": 0.3259851396361773, "grad_norm": 0.4994613826274872, "learning_rate": 8.05942319333327e-05, "loss": 2.9653, "step": 3976 }, { "epoch": 0.3261491160645657, "grad_norm": 0.46741795539855957, "learning_rate": 8.057278242595974e-05, "loss": 2.9488, "step": 3978 }, { "epoch": 0.32631309249295415, "grad_norm": 0.5505892038345337, "learning_rate": 8.055132392873196e-05, "loss": 2.9583, "step": 3980 }, { "epoch": 0.32647706892134254, "grad_norm": 0.45577502250671387, "learning_rate": 8.052985644795918e-05, "loss": 2.8944, "step": 3982 }, { "epoch": 0.326641045349731, "grad_norm": 0.4663446843624115, "learning_rate": 8.050837998995389e-05, "loss": 2.9601, "step": 3984 }, { "epoch": 0.3268050217781194, "grad_norm": 0.5026230812072754, "learning_rate": 8.048689456103118e-05, "loss": 2.9505, "step": 3986 }, { "epoch": 0.3269689982065078, "grad_norm": 0.5132330060005188, "learning_rate": 8.046540016750877e-05, "loss": 2.9677, "step": 3988 }, { "epoch": 0.3271329746348962, "grad_norm": 0.45152610540390015, "learning_rate": 8.044389681570705e-05, "loss": 2.9187, "step": 3990 }, { "epoch": 0.32729695106328466, "grad_norm": 0.5075539350509644, "learning_rate": 8.042238451194905e-05, "loss": 3.0216, "step": 3992 }, { "epoch": 0.32746092749167305, "grad_norm": 0.44997990131378174, "learning_rate": 8.040086326256038e-05, "loss": 2.9747, "step": 3994 }, { "epoch": 0.3276249039200615, "grad_norm": 0.4499579966068268, "learning_rate": 8.037933307386932e-05, "loss": 2.9434, "step": 3996 }, { "epoch": 0.3277888803484499, "grad_norm": 0.5278234481811523, "learning_rate": 8.035779395220678e-05, "loss": 2.9539, "step": 3998 }, { "epoch": 0.32795285677683833, "grad_norm": 0.5585642457008362, "learning_rate": 8.03362459039063e-05, "loss": 2.9796, "step": 4000 }, { "epoch": 0.3281168332052267, "grad_norm": 0.6120083928108215, "learning_rate": 8.031468893530399e-05, "loss": 2.9489, "step": 4002 }, { "epoch": 0.32828080963361517, "grad_norm": 0.5297592878341675, "learning_rate": 8.029312305273868e-05, "loss": 2.9691, "step": 4004 }, { "epoch": 0.3284447860620036, "grad_norm": 0.5784056186676025, "learning_rate": 8.027154826255172e-05, "loss": 2.9629, "step": 4006 }, { "epoch": 0.328608762490392, "grad_norm": 0.5364445447921753, "learning_rate": 8.024996457108716e-05, "loss": 2.9363, "step": 4008 }, { "epoch": 0.32877273891878045, "grad_norm": 0.5013018250465393, "learning_rate": 8.022837198469162e-05, "loss": 2.9376, "step": 4010 }, { "epoch": 0.32893671534716884, "grad_norm": 0.5268305540084839, "learning_rate": 8.020677050971435e-05, "loss": 2.9699, "step": 4012 }, { "epoch": 0.3291006917755573, "grad_norm": 0.5022383332252502, "learning_rate": 8.018516015250721e-05, "loss": 2.9699, "step": 4014 }, { "epoch": 0.3292646682039457, "grad_norm": 0.4535817503929138, "learning_rate": 8.016354091942469e-05, "loss": 3.0048, "step": 4016 }, { "epoch": 0.3294286446323341, "grad_norm": 0.5175948739051819, "learning_rate": 8.014191281682384e-05, "loss": 2.974, "step": 4018 }, { "epoch": 0.3295926210607225, "grad_norm": 0.4674460291862488, "learning_rate": 8.012027585106439e-05, "loss": 2.9827, "step": 4020 }, { "epoch": 0.32975659748911096, "grad_norm": 0.4415198564529419, "learning_rate": 8.009863002850863e-05, "loss": 2.9546, "step": 4022 }, { "epoch": 0.32992057391749935, "grad_norm": 0.4976121187210083, "learning_rate": 8.007697535552143e-05, "loss": 2.9473, "step": 4024 }, { "epoch": 0.3300845503458878, "grad_norm": 0.475236177444458, "learning_rate": 8.005531183847035e-05, "loss": 2.9237, "step": 4026 }, { "epoch": 0.3302485267742762, "grad_norm": 0.437836617231369, "learning_rate": 8.003363948372547e-05, "loss": 2.9309, "step": 4028 }, { "epoch": 0.33041250320266463, "grad_norm": 0.4106907844543457, "learning_rate": 8.001195829765948e-05, "loss": 2.9151, "step": 4030 }, { "epoch": 0.330576479631053, "grad_norm": 0.4589502215385437, "learning_rate": 7.999026828664771e-05, "loss": 2.9092, "step": 4032 }, { "epoch": 0.33074045605944147, "grad_norm": 0.5295969247817993, "learning_rate": 7.996856945706804e-05, "loss": 2.9827, "step": 4034 }, { "epoch": 0.33090443248782986, "grad_norm": 0.46702131628990173, "learning_rate": 7.994686181530095e-05, "loss": 2.9661, "step": 4036 }, { "epoch": 0.3310684089162183, "grad_norm": 0.4186733365058899, "learning_rate": 7.992514536772954e-05, "loss": 2.9055, "step": 4038 }, { "epoch": 0.3312323853446067, "grad_norm": 0.46188080310821533, "learning_rate": 7.990342012073948e-05, "loss": 2.8955, "step": 4040 }, { "epoch": 0.33139636177299514, "grad_norm": 0.5356650352478027, "learning_rate": 7.988168608071901e-05, "loss": 2.9707, "step": 4042 }, { "epoch": 0.33156033820138353, "grad_norm": 0.5137851238250732, "learning_rate": 7.9859943254059e-05, "loss": 2.9509, "step": 4044 }, { "epoch": 0.331724314629772, "grad_norm": 0.4819590151309967, "learning_rate": 7.983819164715286e-05, "loss": 2.9202, "step": 4046 }, { "epoch": 0.33188829105816037, "grad_norm": 0.5087484121322632, "learning_rate": 7.98164312663966e-05, "loss": 2.9112, "step": 4048 }, { "epoch": 0.3320522674865488, "grad_norm": 0.49762898683547974, "learning_rate": 7.97946621181888e-05, "loss": 2.8998, "step": 4050 }, { "epoch": 0.3322162439149372, "grad_norm": 0.53311687707901, "learning_rate": 7.977288420893065e-05, "loss": 2.9138, "step": 4052 }, { "epoch": 0.33238022034332565, "grad_norm": 0.5476859211921692, "learning_rate": 7.975109754502588e-05, "loss": 2.9175, "step": 4054 }, { "epoch": 0.33254419677171404, "grad_norm": 0.5419086217880249, "learning_rate": 7.972930213288079e-05, "loss": 2.9735, "step": 4056 }, { "epoch": 0.3327081732001025, "grad_norm": 0.5330222845077515, "learning_rate": 7.970749797890432e-05, "loss": 2.9302, "step": 4058 }, { "epoch": 0.33287214962849093, "grad_norm": 0.511711597442627, "learning_rate": 7.968568508950786e-05, "loss": 2.9859, "step": 4060 }, { "epoch": 0.3330361260568793, "grad_norm": 0.46439769864082336, "learning_rate": 7.96638634711055e-05, "loss": 2.9397, "step": 4062 }, { "epoch": 0.33320010248526777, "grad_norm": 0.4752715528011322, "learning_rate": 7.96420331301138e-05, "loss": 2.939, "step": 4064 }, { "epoch": 0.33336407891365616, "grad_norm": 0.4505968689918518, "learning_rate": 7.962019407295194e-05, "loss": 2.8908, "step": 4066 }, { "epoch": 0.3335280553420446, "grad_norm": 0.47633254528045654, "learning_rate": 7.959834630604164e-05, "loss": 2.9235, "step": 4068 }, { "epoch": 0.333692031770433, "grad_norm": 0.460891991853714, "learning_rate": 7.957648983580718e-05, "loss": 2.9573, "step": 4070 }, { "epoch": 0.33385600819882144, "grad_norm": 0.4335838854312897, "learning_rate": 7.955462466867539e-05, "loss": 2.9811, "step": 4072 }, { "epoch": 0.33401998462720983, "grad_norm": 0.4745972752571106, "learning_rate": 7.95327508110757e-05, "loss": 2.9383, "step": 4074 }, { "epoch": 0.3341839610555983, "grad_norm": 0.46762803196907043, "learning_rate": 7.951086826944005e-05, "loss": 2.9354, "step": 4076 }, { "epoch": 0.33434793748398667, "grad_norm": 0.4496844708919525, "learning_rate": 7.948897705020293e-05, "loss": 2.9077, "step": 4078 }, { "epoch": 0.3345119139123751, "grad_norm": 0.46020054817199707, "learning_rate": 7.946707715980143e-05, "loss": 2.9189, "step": 4080 }, { "epoch": 0.3346758903407635, "grad_norm": 0.48112010955810547, "learning_rate": 7.944516860467518e-05, "loss": 2.9503, "step": 4082 }, { "epoch": 0.33483986676915195, "grad_norm": 0.4764317274093628, "learning_rate": 7.94232513912663e-05, "loss": 2.8818, "step": 4084 }, { "epoch": 0.33500384319754034, "grad_norm": 0.5369095802307129, "learning_rate": 7.940132552601949e-05, "loss": 2.9383, "step": 4086 }, { "epoch": 0.3351678196259288, "grad_norm": 0.5077469944953918, "learning_rate": 7.937939101538203e-05, "loss": 2.9377, "step": 4088 }, { "epoch": 0.3353317960543172, "grad_norm": 0.570915937423706, "learning_rate": 7.935744786580371e-05, "loss": 2.9779, "step": 4090 }, { "epoch": 0.3354957724827056, "grad_norm": 0.4736226499080658, "learning_rate": 7.933549608373683e-05, "loss": 2.8925, "step": 4092 }, { "epoch": 0.335659748911094, "grad_norm": 0.4973873496055603, "learning_rate": 7.931353567563631e-05, "loss": 2.9277, "step": 4094 }, { "epoch": 0.33582372533948246, "grad_norm": 0.511052668094635, "learning_rate": 7.92915666479595e-05, "loss": 2.8972, "step": 4096 }, { "epoch": 0.33598770176787085, "grad_norm": 0.49706801772117615, "learning_rate": 7.926958900716637e-05, "loss": 2.9035, "step": 4098 }, { "epoch": 0.3361516781962593, "grad_norm": 0.5151313543319702, "learning_rate": 7.92476027597194e-05, "loss": 2.9174, "step": 4100 }, { "epoch": 0.3363156546246477, "grad_norm": 0.5134227871894836, "learning_rate": 7.922560791208357e-05, "loss": 2.9505, "step": 4102 }, { "epoch": 0.33647963105303613, "grad_norm": 0.4975057542324066, "learning_rate": 7.920360447072645e-05, "loss": 2.9491, "step": 4104 }, { "epoch": 0.3366436074814245, "grad_norm": 0.5015755295753479, "learning_rate": 7.918159244211807e-05, "loss": 2.9242, "step": 4106 }, { "epoch": 0.33680758390981297, "grad_norm": 0.5908811688423157, "learning_rate": 7.915957183273102e-05, "loss": 2.9008, "step": 4108 }, { "epoch": 0.33697156033820136, "grad_norm": 0.5025661587715149, "learning_rate": 7.91375426490404e-05, "loss": 2.9373, "step": 4110 }, { "epoch": 0.3371355367665898, "grad_norm": 0.5269520282745361, "learning_rate": 7.911550489752389e-05, "loss": 2.9817, "step": 4112 }, { "epoch": 0.3372995131949782, "grad_norm": 0.5089029669761658, "learning_rate": 7.909345858466157e-05, "loss": 2.9172, "step": 4114 }, { "epoch": 0.33746348962336664, "grad_norm": 0.49392855167388916, "learning_rate": 7.907140371693616e-05, "loss": 2.8958, "step": 4116 }, { "epoch": 0.3376274660517551, "grad_norm": 0.4965139329433441, "learning_rate": 7.904934030083281e-05, "loss": 2.9154, "step": 4118 }, { "epoch": 0.3377914424801435, "grad_norm": 0.4956895411014557, "learning_rate": 7.902726834283923e-05, "loss": 2.9441, "step": 4120 }, { "epoch": 0.3379554189085319, "grad_norm": 0.46426016092300415, "learning_rate": 7.90051878494456e-05, "loss": 2.9203, "step": 4122 }, { "epoch": 0.3381193953369203, "grad_norm": 0.47019776701927185, "learning_rate": 7.898309882714468e-05, "loss": 2.9324, "step": 4124 }, { "epoch": 0.33828337176530876, "grad_norm": 0.43719297647476196, "learning_rate": 7.896100128243169e-05, "loss": 2.9194, "step": 4126 }, { "epoch": 0.33844734819369715, "grad_norm": 0.4638632535934448, "learning_rate": 7.893889522180432e-05, "loss": 2.9208, "step": 4128 }, { "epoch": 0.3386113246220856, "grad_norm": 0.46020522713661194, "learning_rate": 7.891678065176284e-05, "loss": 2.9386, "step": 4130 }, { "epoch": 0.338775301050474, "grad_norm": 0.5725560784339905, "learning_rate": 7.889465757880999e-05, "loss": 2.8924, "step": 4132 }, { "epoch": 0.33893927747886243, "grad_norm": 0.5009239912033081, "learning_rate": 7.887252600945096e-05, "loss": 2.9583, "step": 4134 }, { "epoch": 0.3391032539072508, "grad_norm": 0.5835885405540466, "learning_rate": 7.885038595019356e-05, "loss": 2.9996, "step": 4136 }, { "epoch": 0.33926723033563927, "grad_norm": 0.5218272805213928, "learning_rate": 7.882823740754796e-05, "loss": 2.9723, "step": 4138 }, { "epoch": 0.33943120676402766, "grad_norm": 0.48495087027549744, "learning_rate": 7.880608038802694e-05, "loss": 2.9011, "step": 4140 }, { "epoch": 0.3395951831924161, "grad_norm": 0.49771589040756226, "learning_rate": 7.878391489814567e-05, "loss": 2.9282, "step": 4142 }, { "epoch": 0.3397591596208045, "grad_norm": 0.49048569798469543, "learning_rate": 7.876174094442189e-05, "loss": 2.9254, "step": 4144 }, { "epoch": 0.33992313604919294, "grad_norm": 0.5105252861976624, "learning_rate": 7.873955853337578e-05, "loss": 2.9522, "step": 4146 }, { "epoch": 0.34008711247758133, "grad_norm": 0.4772528111934662, "learning_rate": 7.871736767153005e-05, "loss": 2.9794, "step": 4148 }, { "epoch": 0.3402510889059698, "grad_norm": 0.4797604978084564, "learning_rate": 7.869516836540985e-05, "loss": 2.9455, "step": 4150 }, { "epoch": 0.34041506533435817, "grad_norm": 0.4566085934638977, "learning_rate": 7.867296062154284e-05, "loss": 2.9481, "step": 4152 }, { "epoch": 0.3405790417627466, "grad_norm": 0.5133824944496155, "learning_rate": 7.865074444645916e-05, "loss": 2.9179, "step": 4154 }, { "epoch": 0.340743018191135, "grad_norm": 0.475188285112381, "learning_rate": 7.862851984669142e-05, "loss": 2.8836, "step": 4156 }, { "epoch": 0.34090699461952345, "grad_norm": 0.4517628848552704, "learning_rate": 7.86062868287747e-05, "loss": 2.8837, "step": 4158 }, { "epoch": 0.34107097104791184, "grad_norm": 0.44536206126213074, "learning_rate": 7.85840453992466e-05, "loss": 2.9381, "step": 4160 }, { "epoch": 0.3412349474763003, "grad_norm": 0.5397975444793701, "learning_rate": 7.856179556464711e-05, "loss": 2.9107, "step": 4162 }, { "epoch": 0.3413989239046887, "grad_norm": 0.4633306562900543, "learning_rate": 7.853953733151877e-05, "loss": 2.9345, "step": 4164 }, { "epoch": 0.3415629003330771, "grad_norm": 0.4207089841365814, "learning_rate": 7.851727070640658e-05, "loss": 2.9393, "step": 4166 }, { "epoch": 0.3417268767614655, "grad_norm": 0.46550941467285156, "learning_rate": 7.849499569585797e-05, "loss": 2.9011, "step": 4168 }, { "epoch": 0.34189085318985396, "grad_norm": 0.4900607764720917, "learning_rate": 7.847271230642283e-05, "loss": 2.9214, "step": 4170 }, { "epoch": 0.34205482961824235, "grad_norm": 0.45382747054100037, "learning_rate": 7.84504205446536e-05, "loss": 2.9706, "step": 4172 }, { "epoch": 0.3422188060466308, "grad_norm": 0.4561139941215515, "learning_rate": 7.842812041710505e-05, "loss": 2.9733, "step": 4174 }, { "epoch": 0.34238278247501924, "grad_norm": 0.438036173582077, "learning_rate": 7.840581193033452e-05, "loss": 2.9325, "step": 4176 }, { "epoch": 0.34254675890340763, "grad_norm": 0.4495597183704376, "learning_rate": 7.838349509090177e-05, "loss": 2.856, "step": 4178 }, { "epoch": 0.3427107353317961, "grad_norm": 0.44860947132110596, "learning_rate": 7.8361169905369e-05, "loss": 2.8807, "step": 4180 }, { "epoch": 0.34287471176018447, "grad_norm": 0.43089696764945984, "learning_rate": 7.833883638030087e-05, "loss": 2.8998, "step": 4182 }, { "epoch": 0.3430386881885729, "grad_norm": 0.42991116642951965, "learning_rate": 7.831649452226453e-05, "loss": 2.925, "step": 4184 }, { "epoch": 0.3432026646169613, "grad_norm": 0.4584693908691406, "learning_rate": 7.829414433782951e-05, "loss": 2.9263, "step": 4186 }, { "epoch": 0.34336664104534975, "grad_norm": 0.43747174739837646, "learning_rate": 7.827178583356786e-05, "loss": 2.9813, "step": 4188 }, { "epoch": 0.34353061747373814, "grad_norm": 0.44275543093681335, "learning_rate": 7.824941901605407e-05, "loss": 2.9485, "step": 4190 }, { "epoch": 0.3436945939021266, "grad_norm": 0.4621785879135132, "learning_rate": 7.822704389186499e-05, "loss": 2.9154, "step": 4192 }, { "epoch": 0.343858570330515, "grad_norm": 0.4807699918746948, "learning_rate": 7.820466046758001e-05, "loss": 2.939, "step": 4194 }, { "epoch": 0.3440225467589034, "grad_norm": 0.5348572134971619, "learning_rate": 7.818226874978092e-05, "loss": 2.9083, "step": 4196 }, { "epoch": 0.3441865231872918, "grad_norm": 0.4638359844684601, "learning_rate": 7.815986874505195e-05, "loss": 2.951, "step": 4198 }, { "epoch": 0.34435049961568026, "grad_norm": 0.49754995107650757, "learning_rate": 7.813746045997974e-05, "loss": 2.9631, "step": 4200 }, { "epoch": 0.34451447604406865, "grad_norm": 0.44452568888664246, "learning_rate": 7.811504390115344e-05, "loss": 2.9216, "step": 4202 }, { "epoch": 0.3446784524724571, "grad_norm": 0.48824217915534973, "learning_rate": 7.809261907516457e-05, "loss": 2.8855, "step": 4204 }, { "epoch": 0.3448424289008455, "grad_norm": 0.4847983717918396, "learning_rate": 7.807018598860709e-05, "loss": 2.9606, "step": 4206 }, { "epoch": 0.34500640532923393, "grad_norm": 0.4555025100708008, "learning_rate": 7.80477446480774e-05, "loss": 2.9227, "step": 4208 }, { "epoch": 0.3451703817576223, "grad_norm": 0.43306758999824524, "learning_rate": 7.802529506017432e-05, "loss": 2.9116, "step": 4210 }, { "epoch": 0.34533435818601077, "grad_norm": 0.47224709391593933, "learning_rate": 7.80028372314991e-05, "loss": 2.9097, "step": 4212 }, { "epoch": 0.34549833461439916, "grad_norm": 0.6299461126327515, "learning_rate": 7.798037116865542e-05, "loss": 2.9453, "step": 4214 }, { "epoch": 0.3456623110427876, "grad_norm": 0.4493595361709595, "learning_rate": 7.795789687824936e-05, "loss": 2.8246, "step": 4216 }, { "epoch": 0.345826287471176, "grad_norm": 0.4576161205768585, "learning_rate": 7.793541436688943e-05, "loss": 2.9754, "step": 4218 }, { "epoch": 0.34599026389956444, "grad_norm": 0.44774124026298523, "learning_rate": 7.791292364118659e-05, "loss": 2.9545, "step": 4220 }, { "epoch": 0.34615424032795283, "grad_norm": 0.48785826563835144, "learning_rate": 7.789042470775414e-05, "loss": 2.9267, "step": 4222 }, { "epoch": 0.3463182167563413, "grad_norm": 0.4675656855106354, "learning_rate": 7.786791757320788e-05, "loss": 2.9094, "step": 4224 }, { "epoch": 0.34648219318472967, "grad_norm": 0.4874213635921478, "learning_rate": 7.784540224416594e-05, "loss": 2.888, "step": 4226 }, { "epoch": 0.3466461696131181, "grad_norm": 0.43593478202819824, "learning_rate": 7.782287872724895e-05, "loss": 2.9491, "step": 4228 }, { "epoch": 0.3468101460415065, "grad_norm": 0.43017590045928955, "learning_rate": 7.780034702907985e-05, "loss": 2.9125, "step": 4230 }, { "epoch": 0.34697412246989495, "grad_norm": 0.41226688027381897, "learning_rate": 7.777780715628406e-05, "loss": 2.8866, "step": 4232 }, { "epoch": 0.3471380988982834, "grad_norm": 0.44163820147514343, "learning_rate": 7.775525911548935e-05, "loss": 2.9314, "step": 4234 }, { "epoch": 0.3473020753266718, "grad_norm": 0.48321813344955444, "learning_rate": 7.773270291332595e-05, "loss": 2.9164, "step": 4236 }, { "epoch": 0.34746605175506023, "grad_norm": 0.46925100684165955, "learning_rate": 7.771013855642646e-05, "loss": 2.9603, "step": 4238 }, { "epoch": 0.3476300281834486, "grad_norm": 0.49697205424308777, "learning_rate": 7.768756605142584e-05, "loss": 2.9384, "step": 4240 }, { "epoch": 0.34779400461183707, "grad_norm": 0.5071016550064087, "learning_rate": 7.76649854049615e-05, "loss": 2.9333, "step": 4242 }, { "epoch": 0.34795798104022546, "grad_norm": 0.49894312024116516, "learning_rate": 7.764239662367324e-05, "loss": 2.9134, "step": 4244 }, { "epoch": 0.3481219574686139, "grad_norm": 0.5091718435287476, "learning_rate": 7.761979971420323e-05, "loss": 2.9094, "step": 4246 }, { "epoch": 0.3482859338970023, "grad_norm": 0.46571823954582214, "learning_rate": 7.759719468319602e-05, "loss": 2.9714, "step": 4248 }, { "epoch": 0.34844991032539074, "grad_norm": 0.47561269998550415, "learning_rate": 7.75745815372986e-05, "loss": 2.9784, "step": 4250 }, { "epoch": 0.34861388675377913, "grad_norm": 0.5359566807746887, "learning_rate": 7.755196028316027e-05, "loss": 2.9486, "step": 4252 }, { "epoch": 0.3487778631821676, "grad_norm": 0.5660485625267029, "learning_rate": 7.752933092743279e-05, "loss": 2.9364, "step": 4254 }, { "epoch": 0.34894183961055597, "grad_norm": 0.4833694100379944, "learning_rate": 7.750669347677027e-05, "loss": 2.9231, "step": 4256 }, { "epoch": 0.3491058160389444, "grad_norm": 0.509084939956665, "learning_rate": 7.748404793782917e-05, "loss": 2.9245, "step": 4258 }, { "epoch": 0.3492697924673328, "grad_norm": 0.49266326427459717, "learning_rate": 7.74613943172684e-05, "loss": 2.9482, "step": 4260 }, { "epoch": 0.34943376889572125, "grad_norm": 0.4660591781139374, "learning_rate": 7.743873262174917e-05, "loss": 2.9291, "step": 4262 }, { "epoch": 0.34959774532410964, "grad_norm": 0.4857702851295471, "learning_rate": 7.74160628579351e-05, "loss": 2.9203, "step": 4264 }, { "epoch": 0.3497617217524981, "grad_norm": 0.4842332899570465, "learning_rate": 7.739338503249219e-05, "loss": 2.948, "step": 4266 }, { "epoch": 0.3499256981808865, "grad_norm": 0.5411705374717712, "learning_rate": 7.737069915208882e-05, "loss": 2.9183, "step": 4268 }, { "epoch": 0.3500896746092749, "grad_norm": 0.5409607291221619, "learning_rate": 7.734800522339566e-05, "loss": 2.8847, "step": 4270 }, { "epoch": 0.3502536510376633, "grad_norm": 0.43880927562713623, "learning_rate": 7.732530325308587e-05, "loss": 2.9398, "step": 4272 }, { "epoch": 0.35041762746605176, "grad_norm": 0.44448137283325195, "learning_rate": 7.730259324783489e-05, "loss": 2.9228, "step": 4274 }, { "epoch": 0.35058160389444015, "grad_norm": 0.5357170104980469, "learning_rate": 7.727987521432054e-05, "loss": 2.9275, "step": 4276 }, { "epoch": 0.3507455803228286, "grad_norm": 0.48424386978149414, "learning_rate": 7.725714915922299e-05, "loss": 2.8905, "step": 4278 }, { "epoch": 0.350909556751217, "grad_norm": 0.4209941625595093, "learning_rate": 7.72344150892248e-05, "loss": 2.8945, "step": 4280 }, { "epoch": 0.35107353317960543, "grad_norm": 0.42999011278152466, "learning_rate": 7.721167301101088e-05, "loss": 2.8828, "step": 4282 }, { "epoch": 0.3512375096079938, "grad_norm": 0.4773045480251312, "learning_rate": 7.718892293126847e-05, "loss": 2.9068, "step": 4284 }, { "epoch": 0.35140148603638227, "grad_norm": 0.45867007970809937, "learning_rate": 7.716616485668718e-05, "loss": 2.9198, "step": 4286 }, { "epoch": 0.35156546246477066, "grad_norm": 0.47990646958351135, "learning_rate": 7.714339879395897e-05, "loss": 2.8771, "step": 4288 }, { "epoch": 0.3517294388931591, "grad_norm": 0.5104199051856995, "learning_rate": 7.712062474977815e-05, "loss": 2.9214, "step": 4290 }, { "epoch": 0.35189341532154755, "grad_norm": 0.5507774353027344, "learning_rate": 7.709784273084137e-05, "loss": 2.9568, "step": 4292 }, { "epoch": 0.35205739174993594, "grad_norm": 0.5063462257385254, "learning_rate": 7.707505274384761e-05, "loss": 2.8618, "step": 4294 }, { "epoch": 0.3522213681783244, "grad_norm": 0.5065426826477051, "learning_rate": 7.705225479549825e-05, "loss": 2.9249, "step": 4296 }, { "epoch": 0.3523853446067128, "grad_norm": 0.47254642844200134, "learning_rate": 7.702944889249694e-05, "loss": 2.9213, "step": 4298 }, { "epoch": 0.3525493210351012, "grad_norm": 0.5116934776306152, "learning_rate": 7.700663504154973e-05, "loss": 2.9124, "step": 4300 }, { "epoch": 0.3527132974634896, "grad_norm": 0.4968010187149048, "learning_rate": 7.698381324936496e-05, "loss": 2.8896, "step": 4302 }, { "epoch": 0.35287727389187806, "grad_norm": 0.5114293694496155, "learning_rate": 7.696098352265334e-05, "loss": 2.9226, "step": 4304 }, { "epoch": 0.35304125032026645, "grad_norm": 0.46992501616477966, "learning_rate": 7.693814586812788e-05, "loss": 2.8989, "step": 4306 }, { "epoch": 0.3532052267486549, "grad_norm": 0.43957480788230896, "learning_rate": 7.691530029250393e-05, "loss": 2.9235, "step": 4308 }, { "epoch": 0.3533692031770433, "grad_norm": 0.4991060793399811, "learning_rate": 7.689244680249922e-05, "loss": 2.9177, "step": 4310 }, { "epoch": 0.35353317960543174, "grad_norm": 0.5018460154533386, "learning_rate": 7.686958540483372e-05, "loss": 2.9189, "step": 4312 }, { "epoch": 0.3536971560338201, "grad_norm": 0.525246798992157, "learning_rate": 7.68467161062298e-05, "loss": 2.893, "step": 4314 }, { "epoch": 0.35386113246220857, "grad_norm": 0.49975743889808655, "learning_rate": 7.682383891341212e-05, "loss": 2.9423, "step": 4316 }, { "epoch": 0.35402510889059696, "grad_norm": 0.4797281324863434, "learning_rate": 7.680095383310764e-05, "loss": 2.925, "step": 4318 }, { "epoch": 0.3541890853189854, "grad_norm": 0.5019105672836304, "learning_rate": 7.677806087204567e-05, "loss": 2.9299, "step": 4320 }, { "epoch": 0.3543530617473738, "grad_norm": 0.4949765205383301, "learning_rate": 7.675516003695787e-05, "loss": 2.9248, "step": 4322 }, { "epoch": 0.35451703817576224, "grad_norm": 0.46523308753967285, "learning_rate": 7.673225133457815e-05, "loss": 2.8888, "step": 4324 }, { "epoch": 0.35468101460415064, "grad_norm": 0.4424777328968048, "learning_rate": 7.670933477164275e-05, "loss": 2.9019, "step": 4326 }, { "epoch": 0.3548449910325391, "grad_norm": 0.43407967686653137, "learning_rate": 7.668641035489024e-05, "loss": 2.8801, "step": 4328 }, { "epoch": 0.35500896746092747, "grad_norm": 0.48617562651634216, "learning_rate": 7.666347809106149e-05, "loss": 2.9603, "step": 4330 }, { "epoch": 0.3551729438893159, "grad_norm": 0.441741406917572, "learning_rate": 7.664053798689968e-05, "loss": 2.8982, "step": 4332 }, { "epoch": 0.3553369203177043, "grad_norm": 0.4365949034690857, "learning_rate": 7.66175900491503e-05, "loss": 2.9343, "step": 4334 }, { "epoch": 0.35550089674609275, "grad_norm": 0.47714248299598694, "learning_rate": 7.659463428456113e-05, "loss": 2.9008, "step": 4336 }, { "epoch": 0.35566487317448114, "grad_norm": 0.4637061357498169, "learning_rate": 7.657167069988225e-05, "loss": 2.8479, "step": 4338 }, { "epoch": 0.3558288496028696, "grad_norm": 0.48176297545433044, "learning_rate": 7.654869930186607e-05, "loss": 2.8835, "step": 4340 }, { "epoch": 0.355992826031258, "grad_norm": 0.4734376072883606, "learning_rate": 7.652572009726726e-05, "loss": 2.9718, "step": 4342 }, { "epoch": 0.3561568024596464, "grad_norm": 0.4756949245929718, "learning_rate": 7.650273309284281e-05, "loss": 2.8875, "step": 4344 }, { "epoch": 0.3563207788880349, "grad_norm": 0.4926952123641968, "learning_rate": 7.647973829535199e-05, "loss": 2.9093, "step": 4346 }, { "epoch": 0.35648475531642326, "grad_norm": 0.4380865693092346, "learning_rate": 7.645673571155636e-05, "loss": 2.9251, "step": 4348 }, { "epoch": 0.3566487317448117, "grad_norm": 0.45024943351745605, "learning_rate": 7.643372534821978e-05, "loss": 2.9649, "step": 4350 }, { "epoch": 0.3568127081732001, "grad_norm": 0.4537756145000458, "learning_rate": 7.64107072121084e-05, "loss": 2.9046, "step": 4352 }, { "epoch": 0.35697668460158855, "grad_norm": 0.4084469676017761, "learning_rate": 7.638768130999063e-05, "loss": 2.8553, "step": 4354 }, { "epoch": 0.35714066102997694, "grad_norm": 0.4993169605731964, "learning_rate": 7.636464764863721e-05, "loss": 2.8792, "step": 4356 }, { "epoch": 0.3573046374583654, "grad_norm": 0.4630793333053589, "learning_rate": 7.634160623482111e-05, "loss": 2.8703, "step": 4358 }, { "epoch": 0.35746861388675377, "grad_norm": 0.43812108039855957, "learning_rate": 7.631855707531762e-05, "loss": 2.8906, "step": 4360 }, { "epoch": 0.3576325903151422, "grad_norm": 0.45748457312583923, "learning_rate": 7.629550017690428e-05, "loss": 2.916, "step": 4362 }, { "epoch": 0.3577965667435306, "grad_norm": 0.4260389804840088, "learning_rate": 7.627243554636092e-05, "loss": 2.9324, "step": 4364 }, { "epoch": 0.35796054317191905, "grad_norm": 0.4282701313495636, "learning_rate": 7.624936319046965e-05, "loss": 2.9126, "step": 4366 }, { "epoch": 0.35812451960030744, "grad_norm": 0.4490812420845032, "learning_rate": 7.622628311601482e-05, "loss": 2.9562, "step": 4368 }, { "epoch": 0.3582884960286959, "grad_norm": 0.47239381074905396, "learning_rate": 7.620319532978311e-05, "loss": 2.8903, "step": 4370 }, { "epoch": 0.3584524724570843, "grad_norm": 0.44792598485946655, "learning_rate": 7.618009983856338e-05, "loss": 2.9105, "step": 4372 }, { "epoch": 0.3586164488854727, "grad_norm": 0.44292473793029785, "learning_rate": 7.615699664914685e-05, "loss": 2.8907, "step": 4374 }, { "epoch": 0.3587804253138611, "grad_norm": 0.4768601059913635, "learning_rate": 7.613388576832692e-05, "loss": 2.8963, "step": 4376 }, { "epoch": 0.35894440174224956, "grad_norm": 0.45533299446105957, "learning_rate": 7.61107672028993e-05, "loss": 2.8683, "step": 4378 }, { "epoch": 0.35910837817063795, "grad_norm": 0.48470351099967957, "learning_rate": 7.608764095966197e-05, "loss": 2.9618, "step": 4380 }, { "epoch": 0.3592723545990264, "grad_norm": 0.49000081419944763, "learning_rate": 7.606450704541514e-05, "loss": 2.9211, "step": 4382 }, { "epoch": 0.3594363310274148, "grad_norm": 0.536358654499054, "learning_rate": 7.604136546696127e-05, "loss": 2.9231, "step": 4384 }, { "epoch": 0.35960030745580324, "grad_norm": 0.509800136089325, "learning_rate": 7.60182162311051e-05, "loss": 2.8607, "step": 4386 }, { "epoch": 0.3597642838841916, "grad_norm": 0.47165894508361816, "learning_rate": 7.59950593446536e-05, "loss": 2.9478, "step": 4388 }, { "epoch": 0.3599282603125801, "grad_norm": 0.48365214467048645, "learning_rate": 7.5971894814416e-05, "loss": 2.8759, "step": 4390 }, { "epoch": 0.36009223674096846, "grad_norm": 0.507278323173523, "learning_rate": 7.594872264720378e-05, "loss": 2.9494, "step": 4392 }, { "epoch": 0.3602562131693569, "grad_norm": 0.5131365060806274, "learning_rate": 7.592554284983067e-05, "loss": 2.8763, "step": 4394 }, { "epoch": 0.3604201895977453, "grad_norm": 0.44358497858047485, "learning_rate": 7.590235542911262e-05, "loss": 2.8689, "step": 4396 }, { "epoch": 0.36058416602613375, "grad_norm": 0.45631396770477295, "learning_rate": 7.587916039186782e-05, "loss": 2.9157, "step": 4398 }, { "epoch": 0.36074814245452214, "grad_norm": 0.4669037461280823, "learning_rate": 7.585595774491675e-05, "loss": 2.888, "step": 4400 }, { "epoch": 0.3609121188829106, "grad_norm": 0.479308158159256, "learning_rate": 7.58327474950821e-05, "loss": 2.9263, "step": 4402 }, { "epoch": 0.36107609531129903, "grad_norm": 0.44483739137649536, "learning_rate": 7.580952964918873e-05, "loss": 2.9215, "step": 4404 }, { "epoch": 0.3612400717396874, "grad_norm": 0.47619539499282837, "learning_rate": 7.578630421406385e-05, "loss": 2.9243, "step": 4406 }, { "epoch": 0.36140404816807586, "grad_norm": 0.45321038365364075, "learning_rate": 7.576307119653682e-05, "loss": 2.9044, "step": 4408 }, { "epoch": 0.36156802459646425, "grad_norm": 0.48409467935562134, "learning_rate": 7.573983060343927e-05, "loss": 2.9526, "step": 4410 }, { "epoch": 0.3617320010248527, "grad_norm": 0.474016934633255, "learning_rate": 7.571658244160504e-05, "loss": 2.8757, "step": 4412 }, { "epoch": 0.3618959774532411, "grad_norm": 0.4701443016529083, "learning_rate": 7.569332671787019e-05, "loss": 2.897, "step": 4414 }, { "epoch": 0.36205995388162954, "grad_norm": 0.4822806119918823, "learning_rate": 7.567006343907302e-05, "loss": 2.8578, "step": 4416 }, { "epoch": 0.3622239303100179, "grad_norm": 0.47614964842796326, "learning_rate": 7.564679261205401e-05, "loss": 2.9121, "step": 4418 }, { "epoch": 0.3623879067384064, "grad_norm": 0.42333364486694336, "learning_rate": 7.562351424365592e-05, "loss": 2.8316, "step": 4420 }, { "epoch": 0.36255188316679476, "grad_norm": 0.39652881026268005, "learning_rate": 7.56002283407237e-05, "loss": 2.8848, "step": 4422 }, { "epoch": 0.3627158595951832, "grad_norm": 0.44348645210266113, "learning_rate": 7.55769349101045e-05, "loss": 2.8826, "step": 4424 }, { "epoch": 0.3628798360235716, "grad_norm": 0.44164928793907166, "learning_rate": 7.555363395864773e-05, "loss": 2.9068, "step": 4426 }, { "epoch": 0.36304381245196005, "grad_norm": 0.4629053771495819, "learning_rate": 7.553032549320494e-05, "loss": 2.9264, "step": 4428 }, { "epoch": 0.36320778888034844, "grad_norm": 0.49411946535110474, "learning_rate": 7.550700952062995e-05, "loss": 2.9582, "step": 4430 }, { "epoch": 0.3633717653087369, "grad_norm": 0.5183814167976379, "learning_rate": 7.548368604777878e-05, "loss": 2.9351, "step": 4432 }, { "epoch": 0.3635357417371253, "grad_norm": 0.4873194694519043, "learning_rate": 7.546035508150962e-05, "loss": 2.9183, "step": 4434 }, { "epoch": 0.3636997181655137, "grad_norm": 0.44624242186546326, "learning_rate": 7.543701662868288e-05, "loss": 2.9125, "step": 4436 }, { "epoch": 0.3638636945939021, "grad_norm": 0.45851930975914, "learning_rate": 7.541367069616121e-05, "loss": 2.8795, "step": 4438 }, { "epoch": 0.36402767102229056, "grad_norm": 0.4716365933418274, "learning_rate": 7.539031729080941e-05, "loss": 2.9401, "step": 4440 }, { "epoch": 0.36419164745067895, "grad_norm": 0.5059689283370972, "learning_rate": 7.536695641949447e-05, "loss": 2.9172, "step": 4442 }, { "epoch": 0.3643556238790674, "grad_norm": 0.5059446692466736, "learning_rate": 7.534358808908564e-05, "loss": 2.962, "step": 4444 }, { "epoch": 0.3645196003074558, "grad_norm": 0.4895912706851959, "learning_rate": 7.53202123064543e-05, "loss": 2.948, "step": 4446 }, { "epoch": 0.36468357673584423, "grad_norm": 0.4798704981803894, "learning_rate": 7.529682907847402e-05, "loss": 2.9029, "step": 4448 }, { "epoch": 0.3648475531642326, "grad_norm": 0.5090065002441406, "learning_rate": 7.527343841202064e-05, "loss": 2.9106, "step": 4450 }, { "epoch": 0.36501152959262106, "grad_norm": 0.49287864565849304, "learning_rate": 7.525004031397209e-05, "loss": 2.9063, "step": 4452 }, { "epoch": 0.36517550602100946, "grad_norm": 0.4472784101963043, "learning_rate": 7.522663479120854e-05, "loss": 2.9396, "step": 4454 }, { "epoch": 0.3653394824493979, "grad_norm": 0.45480141043663025, "learning_rate": 7.520322185061232e-05, "loss": 2.8923, "step": 4456 }, { "epoch": 0.3655034588777863, "grad_norm": 0.4318976402282715, "learning_rate": 7.517980149906795e-05, "loss": 2.9187, "step": 4458 }, { "epoch": 0.36566743530617474, "grad_norm": 0.42421650886535645, "learning_rate": 7.515637374346216e-05, "loss": 2.8494, "step": 4460 }, { "epoch": 0.3658314117345632, "grad_norm": 0.4657871127128601, "learning_rate": 7.513293859068378e-05, "loss": 2.8256, "step": 4462 }, { "epoch": 0.3659953881629516, "grad_norm": 0.4119051992893219, "learning_rate": 7.510949604762389e-05, "loss": 2.8893, "step": 4464 }, { "epoch": 0.36615936459134, "grad_norm": 0.44624143838882446, "learning_rate": 7.508604612117572e-05, "loss": 2.9587, "step": 4466 }, { "epoch": 0.3663233410197284, "grad_norm": 0.43493539094924927, "learning_rate": 7.506258881823463e-05, "loss": 2.907, "step": 4468 }, { "epoch": 0.36648731744811686, "grad_norm": 0.42813992500305176, "learning_rate": 7.503912414569821e-05, "loss": 2.8328, "step": 4470 }, { "epoch": 0.36665129387650525, "grad_norm": 0.4879220724105835, "learning_rate": 7.50156521104662e-05, "loss": 2.9389, "step": 4472 }, { "epoch": 0.3668152703048937, "grad_norm": 0.49016737937927246, "learning_rate": 7.499217271944049e-05, "loss": 2.9138, "step": 4474 }, { "epoch": 0.3669792467332821, "grad_norm": 0.4534437358379364, "learning_rate": 7.496868597952513e-05, "loss": 2.8611, "step": 4476 }, { "epoch": 0.36714322316167053, "grad_norm": 0.5044350624084473, "learning_rate": 7.494519189762634e-05, "loss": 2.8228, "step": 4478 }, { "epoch": 0.3673071995900589, "grad_norm": 0.44165295362472534, "learning_rate": 7.492169048065252e-05, "loss": 2.9049, "step": 4480 }, { "epoch": 0.36747117601844737, "grad_norm": 0.456106036901474, "learning_rate": 7.489818173551418e-05, "loss": 2.8331, "step": 4482 }, { "epoch": 0.36763515244683576, "grad_norm": 0.4963456392288208, "learning_rate": 7.487466566912405e-05, "loss": 2.965, "step": 4484 }, { "epoch": 0.3677991288752242, "grad_norm": 0.5101214647293091, "learning_rate": 7.485114228839693e-05, "loss": 2.9389, "step": 4486 }, { "epoch": 0.3679631053036126, "grad_norm": 0.5219040513038635, "learning_rate": 7.482761160024982e-05, "loss": 2.9613, "step": 4488 }, { "epoch": 0.36812708173200104, "grad_norm": 0.46619266271591187, "learning_rate": 7.480407361160189e-05, "loss": 2.9146, "step": 4490 }, { "epoch": 0.36829105816038943, "grad_norm": 0.44373613595962524, "learning_rate": 7.478052832937442e-05, "loss": 2.865, "step": 4492 }, { "epoch": 0.3684550345887779, "grad_norm": 0.450777530670166, "learning_rate": 7.475697576049083e-05, "loss": 2.8657, "step": 4494 }, { "epoch": 0.36861901101716626, "grad_norm": 0.5079871416091919, "learning_rate": 7.473341591187672e-05, "loss": 2.9215, "step": 4496 }, { "epoch": 0.3687829874455547, "grad_norm": 0.44126954674720764, "learning_rate": 7.47098487904598e-05, "loss": 2.9016, "step": 4498 }, { "epoch": 0.3689469638739431, "grad_norm": 0.4707218110561371, "learning_rate": 7.468627440316991e-05, "loss": 2.8649, "step": 4500 }, { "epoch": 0.36911094030233155, "grad_norm": 0.4811074137687683, "learning_rate": 7.466269275693906e-05, "loss": 2.8832, "step": 4502 }, { "epoch": 0.36927491673071994, "grad_norm": 0.5184465050697327, "learning_rate": 7.46391038587014e-05, "loss": 2.8904, "step": 4504 }, { "epoch": 0.3694388931591084, "grad_norm": 0.5476818680763245, "learning_rate": 7.461550771539314e-05, "loss": 2.8888, "step": 4506 }, { "epoch": 0.3696028695874968, "grad_norm": 0.4976902902126312, "learning_rate": 7.459190433395271e-05, "loss": 2.892, "step": 4508 }, { "epoch": 0.3697668460158852, "grad_norm": 0.48264408111572266, "learning_rate": 7.456829372132062e-05, "loss": 2.8924, "step": 4510 }, { "epoch": 0.3699308224442736, "grad_norm": 0.4939655065536499, "learning_rate": 7.454467588443949e-05, "loss": 2.8877, "step": 4512 }, { "epoch": 0.37009479887266206, "grad_norm": 0.5026797652244568, "learning_rate": 7.452105083025411e-05, "loss": 2.9223, "step": 4514 }, { "epoch": 0.37025877530105045, "grad_norm": 0.48400169610977173, "learning_rate": 7.449741856571138e-05, "loss": 2.9129, "step": 4516 }, { "epoch": 0.3704227517294389, "grad_norm": 0.49504354596138, "learning_rate": 7.44737790977603e-05, "loss": 2.9228, "step": 4518 }, { "epoch": 0.37058672815782734, "grad_norm": 0.4225211441516876, "learning_rate": 7.4450132433352e-05, "loss": 2.9272, "step": 4520 }, { "epoch": 0.37075070458621573, "grad_norm": 0.4583554267883301, "learning_rate": 7.442647857943973e-05, "loss": 2.9052, "step": 4522 }, { "epoch": 0.3709146810146042, "grad_norm": 0.46609818935394287, "learning_rate": 7.440281754297884e-05, "loss": 2.8505, "step": 4524 }, { "epoch": 0.37107865744299257, "grad_norm": 0.4706270396709442, "learning_rate": 7.437914933092683e-05, "loss": 2.8684, "step": 4526 }, { "epoch": 0.371242633871381, "grad_norm": 0.4370834231376648, "learning_rate": 7.435547395024324e-05, "loss": 2.9258, "step": 4528 }, { "epoch": 0.3714066102997694, "grad_norm": 0.43355029821395874, "learning_rate": 7.43317914078898e-05, "loss": 2.8883, "step": 4530 }, { "epoch": 0.37157058672815785, "grad_norm": 0.45329612493515015, "learning_rate": 7.430810171083028e-05, "loss": 2.9181, "step": 4532 }, { "epoch": 0.37173456315654624, "grad_norm": 0.4789738357067108, "learning_rate": 7.42844048660306e-05, "loss": 2.9491, "step": 4534 }, { "epoch": 0.3718985395849347, "grad_norm": 0.49160686135292053, "learning_rate": 7.426070088045873e-05, "loss": 2.9147, "step": 4536 }, { "epoch": 0.3720625160133231, "grad_norm": 0.4814167320728302, "learning_rate": 7.42369897610848e-05, "loss": 2.8687, "step": 4538 }, { "epoch": 0.3722264924417115, "grad_norm": 0.4552319645881653, "learning_rate": 7.421327151488102e-05, "loss": 2.9497, "step": 4540 }, { "epoch": 0.3723904688700999, "grad_norm": 0.4466070234775543, "learning_rate": 7.418954614882165e-05, "loss": 2.8809, "step": 4542 }, { "epoch": 0.37255444529848836, "grad_norm": 0.44367876648902893, "learning_rate": 7.416581366988309e-05, "loss": 2.8521, "step": 4544 }, { "epoch": 0.37271842172687675, "grad_norm": 0.514403223991394, "learning_rate": 7.414207408504383e-05, "loss": 2.8818, "step": 4546 }, { "epoch": 0.3728823981552652, "grad_norm": 0.45354655385017395, "learning_rate": 7.411832740128441e-05, "loss": 2.8834, "step": 4548 }, { "epoch": 0.3730463745836536, "grad_norm": 0.5105783939361572, "learning_rate": 7.409457362558753e-05, "loss": 2.8796, "step": 4550 }, { "epoch": 0.37321035101204203, "grad_norm": 0.5413146018981934, "learning_rate": 7.40708127649379e-05, "loss": 2.8757, "step": 4552 }, { "epoch": 0.3733743274404304, "grad_norm": 0.4836574196815491, "learning_rate": 7.404704482632236e-05, "loss": 2.9652, "step": 4554 }, { "epoch": 0.37353830386881887, "grad_norm": 0.4397220015525818, "learning_rate": 7.402326981672982e-05, "loss": 2.8942, "step": 4556 }, { "epoch": 0.37370228029720726, "grad_norm": 0.4230784773826599, "learning_rate": 7.399948774315125e-05, "loss": 2.909, "step": 4558 }, { "epoch": 0.3738662567255957, "grad_norm": 0.4401587247848511, "learning_rate": 7.397569861257973e-05, "loss": 2.8884, "step": 4560 }, { "epoch": 0.3740302331539841, "grad_norm": 0.4340316653251648, "learning_rate": 7.395190243201037e-05, "loss": 2.8603, "step": 4562 }, { "epoch": 0.37419420958237254, "grad_norm": 0.5143851637840271, "learning_rate": 7.39280992084404e-05, "loss": 2.9539, "step": 4564 }, { "epoch": 0.37435818601076093, "grad_norm": 0.4426273703575134, "learning_rate": 7.390428894886912e-05, "loss": 2.8304, "step": 4566 }, { "epoch": 0.3745221624391494, "grad_norm": 0.4303826093673706, "learning_rate": 7.388047166029783e-05, "loss": 2.9216, "step": 4568 }, { "epoch": 0.37468613886753777, "grad_norm": 0.4478808641433716, "learning_rate": 7.385664734973e-05, "loss": 2.9159, "step": 4570 }, { "epoch": 0.3748501152959262, "grad_norm": 0.46657073497772217, "learning_rate": 7.383281602417111e-05, "loss": 2.8904, "step": 4572 }, { "epoch": 0.3750140917243146, "grad_norm": 0.48242586851119995, "learning_rate": 7.380897769062866e-05, "loss": 2.9341, "step": 4574 }, { "epoch": 0.37517806815270305, "grad_norm": 0.4930760860443115, "learning_rate": 7.37851323561123e-05, "loss": 2.9213, "step": 4576 }, { "epoch": 0.3753420445810915, "grad_norm": 0.4156397581100464, "learning_rate": 7.376128002763368e-05, "loss": 2.9469, "step": 4578 }, { "epoch": 0.3755060210094799, "grad_norm": 0.4514547884464264, "learning_rate": 7.37374207122065e-05, "loss": 2.8257, "step": 4580 }, { "epoch": 0.37566999743786833, "grad_norm": 0.5136944055557251, "learning_rate": 7.371355441684657e-05, "loss": 2.861, "step": 4582 }, { "epoch": 0.3758339738662567, "grad_norm": 0.45226147770881653, "learning_rate": 7.368968114857172e-05, "loss": 2.9101, "step": 4584 }, { "epoch": 0.37599795029464517, "grad_norm": 0.4473552405834198, "learning_rate": 7.366580091440177e-05, "loss": 2.8764, "step": 4586 }, { "epoch": 0.37616192672303356, "grad_norm": 0.450043648481369, "learning_rate": 7.364191372135872e-05, "loss": 2.8991, "step": 4588 }, { "epoch": 0.376325903151422, "grad_norm": 0.46261969208717346, "learning_rate": 7.361801957646649e-05, "loss": 2.8812, "step": 4590 }, { "epoch": 0.3764898795798104, "grad_norm": 0.4491453468799591, "learning_rate": 7.359411848675113e-05, "loss": 2.8773, "step": 4592 }, { "epoch": 0.37665385600819884, "grad_norm": 0.4237003028392792, "learning_rate": 7.357021045924068e-05, "loss": 2.8133, "step": 4594 }, { "epoch": 0.37681783243658723, "grad_norm": 0.4374450445175171, "learning_rate": 7.354629550096525e-05, "loss": 2.9094, "step": 4596 }, { "epoch": 0.3769818088649757, "grad_norm": 0.43622317910194397, "learning_rate": 7.352237361895699e-05, "loss": 2.8667, "step": 4598 }, { "epoch": 0.37714578529336407, "grad_norm": 0.5011947751045227, "learning_rate": 7.349844482025003e-05, "loss": 2.938, "step": 4600 }, { "epoch": 0.3773097617217525, "grad_norm": 0.44445064663887024, "learning_rate": 7.347450911188063e-05, "loss": 2.9117, "step": 4602 }, { "epoch": 0.3774737381501409, "grad_norm": 0.462815523147583, "learning_rate": 7.345056650088698e-05, "loss": 2.8672, "step": 4604 }, { "epoch": 0.37763771457852935, "grad_norm": 0.4615892767906189, "learning_rate": 7.342661699430939e-05, "loss": 2.8671, "step": 4606 }, { "epoch": 0.37780169100691774, "grad_norm": 0.44637149572372437, "learning_rate": 7.340266059919014e-05, "loss": 2.895, "step": 4608 }, { "epoch": 0.3779656674353062, "grad_norm": 0.45245638489723206, "learning_rate": 7.337869732257352e-05, "loss": 2.9269, "step": 4610 }, { "epoch": 0.3781296438636946, "grad_norm": 0.5181540846824646, "learning_rate": 7.335472717150593e-05, "loss": 2.9433, "step": 4612 }, { "epoch": 0.378293620292083, "grad_norm": 0.46372494101524353, "learning_rate": 7.33307501530357e-05, "loss": 2.881, "step": 4614 }, { "epoch": 0.3784575967204714, "grad_norm": 0.4642076790332794, "learning_rate": 7.330676627421322e-05, "loss": 2.8851, "step": 4616 }, { "epoch": 0.37862157314885986, "grad_norm": 0.3950099050998688, "learning_rate": 7.328277554209094e-05, "loss": 2.804, "step": 4618 }, { "epoch": 0.37878554957724825, "grad_norm": 0.4654783010482788, "learning_rate": 7.32587779637232e-05, "loss": 2.8696, "step": 4620 }, { "epoch": 0.3789495260056367, "grad_norm": 0.4221939742565155, "learning_rate": 7.323477354616648e-05, "loss": 2.8465, "step": 4622 }, { "epoch": 0.3791135024340251, "grad_norm": 0.42203181982040405, "learning_rate": 7.321076229647921e-05, "loss": 2.8943, "step": 4624 }, { "epoch": 0.37927747886241353, "grad_norm": 0.43732768297195435, "learning_rate": 7.318674422172185e-05, "loss": 2.9149, "step": 4626 }, { "epoch": 0.3794414552908019, "grad_norm": 0.47572824358940125, "learning_rate": 7.316271932895685e-05, "loss": 2.9373, "step": 4628 }, { "epoch": 0.37960543171919037, "grad_norm": 0.4992130994796753, "learning_rate": 7.313868762524867e-05, "loss": 2.8813, "step": 4630 }, { "epoch": 0.3797694081475788, "grad_norm": 0.42080262303352356, "learning_rate": 7.311464911766379e-05, "loss": 2.8948, "step": 4632 }, { "epoch": 0.3799333845759672, "grad_norm": 0.47596582770347595, "learning_rate": 7.309060381327066e-05, "loss": 2.9189, "step": 4634 }, { "epoch": 0.38009736100435565, "grad_norm": 0.46686580777168274, "learning_rate": 7.306655171913976e-05, "loss": 2.8552, "step": 4636 }, { "epoch": 0.38026133743274404, "grad_norm": 0.5205831527709961, "learning_rate": 7.304249284234354e-05, "loss": 2.8598, "step": 4638 }, { "epoch": 0.3804253138611325, "grad_norm": 0.47803831100463867, "learning_rate": 7.301842718995646e-05, "loss": 2.8732, "step": 4640 }, { "epoch": 0.3805892902895209, "grad_norm": 0.5350506901741028, "learning_rate": 7.299435476905498e-05, "loss": 2.9056, "step": 4642 }, { "epoch": 0.3807532667179093, "grad_norm": 0.5091723203659058, "learning_rate": 7.297027558671752e-05, "loss": 2.8303, "step": 4644 }, { "epoch": 0.3809172431462977, "grad_norm": 0.4987587332725525, "learning_rate": 7.294618965002451e-05, "loss": 2.8797, "step": 4646 }, { "epoch": 0.38108121957468616, "grad_norm": 0.4535655677318573, "learning_rate": 7.29220969660584e-05, "loss": 2.8964, "step": 4648 }, { "epoch": 0.38124519600307455, "grad_norm": 0.501015841960907, "learning_rate": 7.289799754190354e-05, "loss": 2.8858, "step": 4650 }, { "epoch": 0.381409172431463, "grad_norm": 0.46643322706222534, "learning_rate": 7.28738913846463e-05, "loss": 2.9152, "step": 4652 }, { "epoch": 0.3815731488598514, "grad_norm": 0.5290485620498657, "learning_rate": 7.284977850137509e-05, "loss": 2.911, "step": 4654 }, { "epoch": 0.38173712528823983, "grad_norm": 0.44401952624320984, "learning_rate": 7.282565889918022e-05, "loss": 2.8881, "step": 4656 }, { "epoch": 0.3819011017166282, "grad_norm": 0.4408971071243286, "learning_rate": 7.280153258515398e-05, "loss": 2.8352, "step": 4658 }, { "epoch": 0.38206507814501667, "grad_norm": 0.46017321944236755, "learning_rate": 7.277739956639071e-05, "loss": 2.848, "step": 4660 }, { "epoch": 0.38222905457340506, "grad_norm": 0.46184903383255005, "learning_rate": 7.275325984998662e-05, "loss": 2.8877, "step": 4662 }, { "epoch": 0.3823930310017935, "grad_norm": 0.4524565041065216, "learning_rate": 7.272911344303994e-05, "loss": 2.8983, "step": 4664 }, { "epoch": 0.3825570074301819, "grad_norm": 0.4392745792865753, "learning_rate": 7.27049603526509e-05, "loss": 2.819, "step": 4666 }, { "epoch": 0.38272098385857034, "grad_norm": 0.4415312111377716, "learning_rate": 7.268080058592163e-05, "loss": 2.894, "step": 4668 }, { "epoch": 0.38288496028695873, "grad_norm": 0.4408855140209198, "learning_rate": 7.265663414995626e-05, "loss": 2.8457, "step": 4670 }, { "epoch": 0.3830489367153472, "grad_norm": 0.539198100566864, "learning_rate": 7.263246105186088e-05, "loss": 2.9059, "step": 4672 }, { "epoch": 0.38321291314373557, "grad_norm": 0.4784288704395294, "learning_rate": 7.26082812987435e-05, "loss": 2.83, "step": 4674 }, { "epoch": 0.383376889572124, "grad_norm": 0.49614304304122925, "learning_rate": 7.258409489771417e-05, "loss": 2.8974, "step": 4676 }, { "epoch": 0.3835408660005124, "grad_norm": 0.4700537323951721, "learning_rate": 7.255990185588482e-05, "loss": 2.8507, "step": 4678 }, { "epoch": 0.38370484242890085, "grad_norm": 0.4723677337169647, "learning_rate": 7.253570218036935e-05, "loss": 2.8582, "step": 4680 }, { "epoch": 0.38386881885728924, "grad_norm": 0.4178030490875244, "learning_rate": 7.251149587828362e-05, "loss": 2.9057, "step": 4682 }, { "epoch": 0.3840327952856777, "grad_norm": 0.4648958146572113, "learning_rate": 7.248728295674545e-05, "loss": 2.8949, "step": 4684 }, { "epoch": 0.3841967717140661, "grad_norm": 0.442658394575119, "learning_rate": 7.246306342287456e-05, "loss": 2.8352, "step": 4686 }, { "epoch": 0.3843607481424545, "grad_norm": 0.48625364899635315, "learning_rate": 7.24388372837927e-05, "loss": 2.8022, "step": 4688 }, { "epoch": 0.38452472457084297, "grad_norm": 0.4710596203804016, "learning_rate": 7.241460454662347e-05, "loss": 2.9089, "step": 4690 }, { "epoch": 0.38468870099923136, "grad_norm": 0.4838905334472656, "learning_rate": 7.239036521849246e-05, "loss": 2.882, "step": 4692 }, { "epoch": 0.3848526774276198, "grad_norm": 0.4697989821434021, "learning_rate": 7.236611930652719e-05, "loss": 2.8626, "step": 4694 }, { "epoch": 0.3850166538560082, "grad_norm": 0.4877309203147888, "learning_rate": 7.23418668178571e-05, "loss": 2.889, "step": 4696 }, { "epoch": 0.38518063028439664, "grad_norm": 0.46774300932884216, "learning_rate": 7.231760775961358e-05, "loss": 2.8607, "step": 4698 }, { "epoch": 0.38534460671278503, "grad_norm": 0.45244014263153076, "learning_rate": 7.229334213892996e-05, "loss": 2.9335, "step": 4700 }, { "epoch": 0.3855085831411735, "grad_norm": 0.5148520469665527, "learning_rate": 7.226906996294151e-05, "loss": 2.8708, "step": 4702 }, { "epoch": 0.38567255956956187, "grad_norm": 0.4698105752468109, "learning_rate": 7.224479123878536e-05, "loss": 2.8489, "step": 4704 }, { "epoch": 0.3858365359979503, "grad_norm": 0.4400726556777954, "learning_rate": 7.222050597360063e-05, "loss": 2.8571, "step": 4706 }, { "epoch": 0.3860005124263387, "grad_norm": 0.532037615776062, "learning_rate": 7.219621417452836e-05, "loss": 2.933, "step": 4708 }, { "epoch": 0.38616448885472715, "grad_norm": 0.5109682679176331, "learning_rate": 7.217191584871147e-05, "loss": 2.9106, "step": 4710 }, { "epoch": 0.38632846528311554, "grad_norm": 0.46120208501815796, "learning_rate": 7.214761100329485e-05, "loss": 2.827, "step": 4712 }, { "epoch": 0.386492441711504, "grad_norm": 0.4793972671031952, "learning_rate": 7.212329964542528e-05, "loss": 2.8791, "step": 4714 }, { "epoch": 0.3866564181398924, "grad_norm": 0.48017799854278564, "learning_rate": 7.209898178225144e-05, "loss": 2.9621, "step": 4716 }, { "epoch": 0.3868203945682808, "grad_norm": 0.484854131937027, "learning_rate": 7.207465742092395e-05, "loss": 2.8686, "step": 4718 }, { "epoch": 0.3869843709966692, "grad_norm": 0.46027281880378723, "learning_rate": 7.205032656859534e-05, "loss": 2.9059, "step": 4720 }, { "epoch": 0.38714834742505766, "grad_norm": 0.46675822138786316, "learning_rate": 7.202598923242003e-05, "loss": 2.8374, "step": 4722 }, { "epoch": 0.38731232385344605, "grad_norm": 0.5142098069190979, "learning_rate": 7.200164541955435e-05, "loss": 2.8344, "step": 4724 }, { "epoch": 0.3874763002818345, "grad_norm": 0.474359929561615, "learning_rate": 7.197729513715657e-05, "loss": 2.8859, "step": 4726 }, { "epoch": 0.3876402767102229, "grad_norm": 0.4211660623550415, "learning_rate": 7.195293839238679e-05, "loss": 2.8561, "step": 4728 }, { "epoch": 0.38780425313861133, "grad_norm": 0.4446225166320801, "learning_rate": 7.192857519240707e-05, "loss": 2.8293, "step": 4730 }, { "epoch": 0.3879682295669997, "grad_norm": 0.46592506766319275, "learning_rate": 7.19042055443814e-05, "loss": 2.9295, "step": 4732 }, { "epoch": 0.38813220599538817, "grad_norm": 0.4611138701438904, "learning_rate": 7.187982945547553e-05, "loss": 2.8819, "step": 4734 }, { "epoch": 0.38829618242377656, "grad_norm": 0.4245645999908447, "learning_rate": 7.185544693285727e-05, "loss": 2.8137, "step": 4736 }, { "epoch": 0.388460158852165, "grad_norm": 0.41161197423934937, "learning_rate": 7.18310579836962e-05, "loss": 2.8618, "step": 4738 }, { "epoch": 0.3886241352805534, "grad_norm": 0.49383336305618286, "learning_rate": 7.180666261516384e-05, "loss": 2.8781, "step": 4740 }, { "epoch": 0.38878811170894184, "grad_norm": 0.48720452189445496, "learning_rate": 7.17822608344336e-05, "loss": 2.8798, "step": 4742 }, { "epoch": 0.38895208813733023, "grad_norm": 0.45469310879707336, "learning_rate": 7.175785264868077e-05, "loss": 2.9009, "step": 4744 }, { "epoch": 0.3891160645657187, "grad_norm": 0.4168962240219116, "learning_rate": 7.17334380650825e-05, "loss": 2.8802, "step": 4746 }, { "epoch": 0.3892800409941071, "grad_norm": 0.43333402276039124, "learning_rate": 7.170901709081784e-05, "loss": 2.8137, "step": 4748 }, { "epoch": 0.3894440174224955, "grad_norm": 0.4718402028083801, "learning_rate": 7.168458973306776e-05, "loss": 2.8091, "step": 4750 }, { "epoch": 0.38960799385088396, "grad_norm": 0.45464134216308594, "learning_rate": 7.1660155999015e-05, "loss": 2.8456, "step": 4752 }, { "epoch": 0.38977197027927235, "grad_norm": 0.4399542212486267, "learning_rate": 7.16357158958443e-05, "loss": 2.9038, "step": 4754 }, { "epoch": 0.3899359467076608, "grad_norm": 0.4799881875514984, "learning_rate": 7.16112694307422e-05, "loss": 2.8751, "step": 4756 }, { "epoch": 0.3900999231360492, "grad_norm": 0.5272742509841919, "learning_rate": 7.158681661089714e-05, "loss": 2.8811, "step": 4758 }, { "epoch": 0.39026389956443763, "grad_norm": 0.46250781416893005, "learning_rate": 7.156235744349938e-05, "loss": 2.8629, "step": 4760 }, { "epoch": 0.390427875992826, "grad_norm": 0.46309372782707214, "learning_rate": 7.15378919357411e-05, "loss": 2.8822, "step": 4762 }, { "epoch": 0.39059185242121447, "grad_norm": 0.45474520325660706, "learning_rate": 7.15134200948163e-05, "loss": 2.8844, "step": 4764 }, { "epoch": 0.39075582884960286, "grad_norm": 0.4643316864967346, "learning_rate": 7.14889419279209e-05, "loss": 2.9392, "step": 4766 }, { "epoch": 0.3909198052779913, "grad_norm": 0.44271019101142883, "learning_rate": 7.146445744225265e-05, "loss": 2.8732, "step": 4768 }, { "epoch": 0.3910837817063797, "grad_norm": 0.4619930684566498, "learning_rate": 7.143996664501114e-05, "loss": 2.8882, "step": 4770 }, { "epoch": 0.39124775813476814, "grad_norm": 0.45898932218551636, "learning_rate": 7.141546954339782e-05, "loss": 2.8998, "step": 4772 }, { "epoch": 0.39141173456315653, "grad_norm": 0.49852630496025085, "learning_rate": 7.139096614461602e-05, "loss": 2.8935, "step": 4774 }, { "epoch": 0.391575710991545, "grad_norm": 0.4800775945186615, "learning_rate": 7.136645645587091e-05, "loss": 2.8701, "step": 4776 }, { "epoch": 0.39173968741993337, "grad_norm": 0.4955558478832245, "learning_rate": 7.13419404843695e-05, "loss": 2.8869, "step": 4778 }, { "epoch": 0.3919036638483218, "grad_norm": 0.4397711455821991, "learning_rate": 7.131741823732065e-05, "loss": 2.9377, "step": 4780 }, { "epoch": 0.3920676402767102, "grad_norm": 0.45045700669288635, "learning_rate": 7.129288972193509e-05, "loss": 2.8382, "step": 4782 }, { "epoch": 0.39223161670509865, "grad_norm": 0.4570524990558624, "learning_rate": 7.126835494542534e-05, "loss": 2.9249, "step": 4784 }, { "epoch": 0.39239559313348704, "grad_norm": 0.5094670653343201, "learning_rate": 7.124381391500584e-05, "loss": 2.8795, "step": 4786 }, { "epoch": 0.3925595695618755, "grad_norm": 0.46974891424179077, "learning_rate": 7.121926663789275e-05, "loss": 2.8413, "step": 4788 }, { "epoch": 0.3927235459902639, "grad_norm": 0.418794184923172, "learning_rate": 7.11947131213042e-05, "loss": 2.8607, "step": 4790 }, { "epoch": 0.3928875224186523, "grad_norm": 0.4206392168998718, "learning_rate": 7.117015337246008e-05, "loss": 2.8609, "step": 4792 }, { "epoch": 0.3930514988470407, "grad_norm": 0.4286406934261322, "learning_rate": 7.114558739858211e-05, "loss": 2.8891, "step": 4794 }, { "epoch": 0.39321547527542916, "grad_norm": 0.4457423686981201, "learning_rate": 7.112101520689388e-05, "loss": 2.9103, "step": 4796 }, { "epoch": 0.39337945170381755, "grad_norm": 0.4439098834991455, "learning_rate": 7.109643680462077e-05, "loss": 2.8102, "step": 4798 }, { "epoch": 0.393543428132206, "grad_norm": 0.47788435220718384, "learning_rate": 7.107185219899e-05, "loss": 2.8454, "step": 4800 }, { "epoch": 0.3937074045605944, "grad_norm": 0.4632824957370758, "learning_rate": 7.104726139723063e-05, "loss": 2.8755, "step": 4802 }, { "epoch": 0.39387138098898283, "grad_norm": 0.4338068664073944, "learning_rate": 7.102266440657351e-05, "loss": 2.797, "step": 4804 }, { "epoch": 0.3940353574173713, "grad_norm": 0.43949875235557556, "learning_rate": 7.099806123425134e-05, "loss": 2.8803, "step": 4806 }, { "epoch": 0.39419933384575967, "grad_norm": 0.4427996575832367, "learning_rate": 7.097345188749864e-05, "loss": 2.8919, "step": 4808 }, { "epoch": 0.3943633102741481, "grad_norm": 0.42919567227363586, "learning_rate": 7.094883637355169e-05, "loss": 2.8092, "step": 4810 }, { "epoch": 0.3945272867025365, "grad_norm": 0.4398113489151001, "learning_rate": 7.092421469964864e-05, "loss": 2.8571, "step": 4812 }, { "epoch": 0.39469126313092495, "grad_norm": 0.41409242153167725, "learning_rate": 7.089958687302945e-05, "loss": 2.841, "step": 4814 }, { "epoch": 0.39485523955931334, "grad_norm": 0.413713276386261, "learning_rate": 7.087495290093585e-05, "loss": 2.8935, "step": 4816 }, { "epoch": 0.3950192159877018, "grad_norm": 0.45033878087997437, "learning_rate": 7.085031279061143e-05, "loss": 2.8811, "step": 4818 }, { "epoch": 0.3951831924160902, "grad_norm": 0.4509275257587433, "learning_rate": 7.082566654930154e-05, "loss": 2.8866, "step": 4820 }, { "epoch": 0.3953471688444786, "grad_norm": 0.46417686343193054, "learning_rate": 7.080101418425336e-05, "loss": 2.889, "step": 4822 }, { "epoch": 0.395511145272867, "grad_norm": 0.440935879945755, "learning_rate": 7.077635570271583e-05, "loss": 2.8239, "step": 4824 }, { "epoch": 0.39567512170125546, "grad_norm": 0.47923141717910767, "learning_rate": 7.075169111193976e-05, "loss": 2.8989, "step": 4826 }, { "epoch": 0.39583909812964385, "grad_norm": 0.45613157749176025, "learning_rate": 7.072702041917769e-05, "loss": 2.8212, "step": 4828 }, { "epoch": 0.3960030745580323, "grad_norm": 0.4167928695678711, "learning_rate": 7.0702343631684e-05, "loss": 2.853, "step": 4830 }, { "epoch": 0.3961670509864207, "grad_norm": 0.4974370300769806, "learning_rate": 7.06776607567148e-05, "loss": 2.8654, "step": 4832 }, { "epoch": 0.39633102741480913, "grad_norm": 0.5273467898368835, "learning_rate": 7.065297180152808e-05, "loss": 2.8749, "step": 4834 }, { "epoch": 0.3964950038431975, "grad_norm": 0.5259729623794556, "learning_rate": 7.062827677338354e-05, "loss": 2.8803, "step": 4836 }, { "epoch": 0.39665898027158597, "grad_norm": 0.6957345008850098, "learning_rate": 7.060357567954269e-05, "loss": 2.8754, "step": 4838 }, { "epoch": 0.39682295669997436, "grad_norm": 0.5111395716667175, "learning_rate": 7.057886852726886e-05, "loss": 2.8317, "step": 4840 }, { "epoch": 0.3969869331283628, "grad_norm": 0.5050389766693115, "learning_rate": 7.05541553238271e-05, "loss": 2.8951, "step": 4842 }, { "epoch": 0.3971509095567512, "grad_norm": 0.4891645014286041, "learning_rate": 7.052943607648428e-05, "loss": 2.8765, "step": 4844 }, { "epoch": 0.39731488598513964, "grad_norm": 0.5061144232749939, "learning_rate": 7.050471079250904e-05, "loss": 2.8982, "step": 4846 }, { "epoch": 0.39747886241352803, "grad_norm": 0.4700469374656677, "learning_rate": 7.047997947917177e-05, "loss": 2.9003, "step": 4848 }, { "epoch": 0.3976428388419165, "grad_norm": 0.5077940225601196, "learning_rate": 7.045524214374469e-05, "loss": 2.881, "step": 4850 }, { "epoch": 0.39780681527030487, "grad_norm": 0.48411741852760315, "learning_rate": 7.043049879350174e-05, "loss": 2.8381, "step": 4852 }, { "epoch": 0.3979707916986933, "grad_norm": 0.4596063196659088, "learning_rate": 7.040574943571864e-05, "loss": 2.8967, "step": 4854 }, { "epoch": 0.3981347681270817, "grad_norm": 0.45511674880981445, "learning_rate": 7.03809940776729e-05, "loss": 2.8329, "step": 4856 }, { "epoch": 0.39829874455547015, "grad_norm": 0.4465174376964569, "learning_rate": 7.035623272664373e-05, "loss": 2.8677, "step": 4858 }, { "epoch": 0.39846272098385854, "grad_norm": 0.4612562954425812, "learning_rate": 7.033146538991218e-05, "loss": 2.864, "step": 4860 }, { "epoch": 0.398626697412247, "grad_norm": 0.44819965958595276, "learning_rate": 7.030669207476103e-05, "loss": 2.8707, "step": 4862 }, { "epoch": 0.39879067384063543, "grad_norm": 0.48881852626800537, "learning_rate": 7.02819127884748e-05, "loss": 2.8788, "step": 4864 }, { "epoch": 0.3989546502690238, "grad_norm": 0.4424434304237366, "learning_rate": 7.025712753833978e-05, "loss": 2.9113, "step": 4866 }, { "epoch": 0.39911862669741227, "grad_norm": 0.49569734930992126, "learning_rate": 7.023233633164403e-05, "loss": 2.8788, "step": 4868 }, { "epoch": 0.39928260312580066, "grad_norm": 0.49903231859207153, "learning_rate": 7.020753917567735e-05, "loss": 2.8644, "step": 4870 }, { "epoch": 0.3994465795541891, "grad_norm": 0.4598727226257324, "learning_rate": 7.018273607773126e-05, "loss": 2.8747, "step": 4872 }, { "epoch": 0.3996105559825775, "grad_norm": 0.4078742563724518, "learning_rate": 7.015792704509906e-05, "loss": 2.861, "step": 4874 }, { "epoch": 0.39977453241096594, "grad_norm": 0.44463399052619934, "learning_rate": 7.013311208507581e-05, "loss": 2.8387, "step": 4876 }, { "epoch": 0.39993850883935433, "grad_norm": 0.4435771703720093, "learning_rate": 7.010829120495827e-05, "loss": 2.8086, "step": 4878 }, { "epoch": 0.4001024852677428, "grad_norm": 0.45103517174720764, "learning_rate": 7.008346441204497e-05, "loss": 2.9025, "step": 4880 }, { "epoch": 0.40026646169613117, "grad_norm": 0.45612624287605286, "learning_rate": 7.005863171363615e-05, "loss": 2.8465, "step": 4882 }, { "epoch": 0.4004304381245196, "grad_norm": 0.4706539809703827, "learning_rate": 7.003379311703384e-05, "loss": 2.8552, "step": 4884 }, { "epoch": 0.400594414552908, "grad_norm": 0.42342978715896606, "learning_rate": 7.000894862954175e-05, "loss": 2.8573, "step": 4886 }, { "epoch": 0.40075839098129645, "grad_norm": 0.41396844387054443, "learning_rate": 6.998409825846534e-05, "loss": 2.7617, "step": 4888 }, { "epoch": 0.40092236740968484, "grad_norm": 0.42022252082824707, "learning_rate": 6.995924201111182e-05, "loss": 2.8342, "step": 4890 }, { "epoch": 0.4010863438380733, "grad_norm": 0.42854151129722595, "learning_rate": 6.993437989479008e-05, "loss": 2.8815, "step": 4892 }, { "epoch": 0.4012503202664617, "grad_norm": 0.4279215633869171, "learning_rate": 6.99095119168108e-05, "loss": 2.8604, "step": 4894 }, { "epoch": 0.4014142966948501, "grad_norm": 0.42381319403648376, "learning_rate": 6.988463808448635e-05, "loss": 2.8371, "step": 4896 }, { "epoch": 0.4015782731232385, "grad_norm": 0.4339781403541565, "learning_rate": 6.985975840513082e-05, "loss": 2.8265, "step": 4898 }, { "epoch": 0.40174224955162696, "grad_norm": 0.44247475266456604, "learning_rate": 6.983487288605998e-05, "loss": 2.87, "step": 4900 }, { "epoch": 0.40190622598001535, "grad_norm": 0.46390461921691895, "learning_rate": 6.98099815345914e-05, "loss": 2.8553, "step": 4902 }, { "epoch": 0.4020702024084038, "grad_norm": 0.45721256732940674, "learning_rate": 6.978508435804432e-05, "loss": 2.8543, "step": 4904 }, { "epoch": 0.4022341788367922, "grad_norm": 0.4119372069835663, "learning_rate": 6.976018136373968e-05, "loss": 2.8354, "step": 4906 }, { "epoch": 0.40239815526518063, "grad_norm": 0.4009445905685425, "learning_rate": 6.973527255900017e-05, "loss": 2.9314, "step": 4908 }, { "epoch": 0.402562131693569, "grad_norm": 0.40952831506729126, "learning_rate": 6.971035795115015e-05, "loss": 2.8343, "step": 4910 }, { "epoch": 0.40272610812195747, "grad_norm": 0.4430871307849884, "learning_rate": 6.96854375475157e-05, "loss": 2.8187, "step": 4912 }, { "epoch": 0.40289008455034586, "grad_norm": 0.48836904764175415, "learning_rate": 6.966051135542462e-05, "loss": 2.8811, "step": 4914 }, { "epoch": 0.4030540609787343, "grad_norm": 0.48596030473709106, "learning_rate": 6.963557938220638e-05, "loss": 2.8653, "step": 4916 }, { "epoch": 0.40321803740712275, "grad_norm": 0.5084185004234314, "learning_rate": 6.961064163519217e-05, "loss": 2.8678, "step": 4918 }, { "epoch": 0.40338201383551114, "grad_norm": 0.48467105627059937, "learning_rate": 6.958569812171488e-05, "loss": 2.8321, "step": 4920 }, { "epoch": 0.4035459902638996, "grad_norm": 0.455792099237442, "learning_rate": 6.95607488491091e-05, "loss": 2.8788, "step": 4922 }, { "epoch": 0.403709966692288, "grad_norm": 0.4418245851993561, "learning_rate": 6.95357938247111e-05, "loss": 2.8572, "step": 4924 }, { "epoch": 0.4038739431206764, "grad_norm": 0.44264933466911316, "learning_rate": 6.951083305585886e-05, "loss": 2.8806, "step": 4926 }, { "epoch": 0.4040379195490648, "grad_norm": 0.5314618349075317, "learning_rate": 6.9485866549892e-05, "loss": 2.898, "step": 4928 }, { "epoch": 0.40420189597745326, "grad_norm": 0.5305108428001404, "learning_rate": 6.946089431415188e-05, "loss": 2.8387, "step": 4930 }, { "epoch": 0.40436587240584165, "grad_norm": 0.47681617736816406, "learning_rate": 6.943591635598155e-05, "loss": 2.861, "step": 4932 }, { "epoch": 0.4045298488342301, "grad_norm": 0.4332691431045532, "learning_rate": 6.941093268272568e-05, "loss": 2.8346, "step": 4934 }, { "epoch": 0.4046938252626185, "grad_norm": 0.4275270998477936, "learning_rate": 6.93859433017307e-05, "loss": 2.8146, "step": 4936 }, { "epoch": 0.40485780169100694, "grad_norm": 0.443386048078537, "learning_rate": 6.936094822034465e-05, "loss": 2.8273, "step": 4938 }, { "epoch": 0.4050217781193953, "grad_norm": 0.406711220741272, "learning_rate": 6.933594744591727e-05, "loss": 2.8644, "step": 4940 }, { "epoch": 0.40518575454778377, "grad_norm": 0.4228709042072296, "learning_rate": 6.931094098580002e-05, "loss": 2.7865, "step": 4942 }, { "epoch": 0.40534973097617216, "grad_norm": 0.4386141002178192, "learning_rate": 6.928592884734595e-05, "loss": 2.8248, "step": 4944 }, { "epoch": 0.4055137074045606, "grad_norm": 0.449709415435791, "learning_rate": 6.926091103790984e-05, "loss": 2.8646, "step": 4946 }, { "epoch": 0.405677683832949, "grad_norm": 0.43006017804145813, "learning_rate": 6.923588756484808e-05, "loss": 2.8325, "step": 4948 }, { "epoch": 0.40584166026133744, "grad_norm": 0.43961092829704285, "learning_rate": 6.921085843551885e-05, "loss": 2.8412, "step": 4950 }, { "epoch": 0.40600563668972584, "grad_norm": 0.4436211884021759, "learning_rate": 6.91858236572818e-05, "loss": 2.896, "step": 4952 }, { "epoch": 0.4061696131181143, "grad_norm": 0.46775439381599426, "learning_rate": 6.916078323749844e-05, "loss": 2.8587, "step": 4954 }, { "epoch": 0.40633358954650267, "grad_norm": 0.43751874566078186, "learning_rate": 6.91357371835318e-05, "loss": 2.8989, "step": 4956 }, { "epoch": 0.4064975659748911, "grad_norm": 0.4038192331790924, "learning_rate": 6.91106855027466e-05, "loss": 2.829, "step": 4958 }, { "epoch": 0.4066615424032795, "grad_norm": 0.4433155655860901, "learning_rate": 6.908562820250925e-05, "loss": 2.845, "step": 4960 }, { "epoch": 0.40682551883166795, "grad_norm": 0.3981752097606659, "learning_rate": 6.906056529018782e-05, "loss": 2.834, "step": 4962 }, { "epoch": 0.40698949526005634, "grad_norm": 0.4669404625892639, "learning_rate": 6.903549677315194e-05, "loss": 2.8514, "step": 4964 }, { "epoch": 0.4071534716884448, "grad_norm": 0.42662471532821655, "learning_rate": 6.901042265877299e-05, "loss": 2.8629, "step": 4966 }, { "epoch": 0.4073174481168332, "grad_norm": 0.45783746242523193, "learning_rate": 6.898534295442394e-05, "loss": 2.8507, "step": 4968 }, { "epoch": 0.4074814245452216, "grad_norm": 0.4851702153682709, "learning_rate": 6.896025766747941e-05, "loss": 2.8843, "step": 4970 }, { "epoch": 0.40764540097361, "grad_norm": 0.4829177260398865, "learning_rate": 6.893516680531568e-05, "loss": 2.8818, "step": 4972 }, { "epoch": 0.40780937740199846, "grad_norm": 0.4882843494415283, "learning_rate": 6.891007037531067e-05, "loss": 2.8301, "step": 4974 }, { "epoch": 0.4079733538303869, "grad_norm": 0.4679323732852936, "learning_rate": 6.888496838484391e-05, "loss": 2.8365, "step": 4976 }, { "epoch": 0.4081373302587753, "grad_norm": 0.4670538902282715, "learning_rate": 6.885986084129657e-05, "loss": 2.7802, "step": 4978 }, { "epoch": 0.40830130668716375, "grad_norm": 0.4513445198535919, "learning_rate": 6.88347477520515e-05, "loss": 2.8893, "step": 4980 }, { "epoch": 0.40846528311555214, "grad_norm": 0.4539678394794464, "learning_rate": 6.880962912449313e-05, "loss": 2.8921, "step": 4982 }, { "epoch": 0.4086292595439406, "grad_norm": 0.46022936701774597, "learning_rate": 6.87845049660075e-05, "loss": 2.8595, "step": 4984 }, { "epoch": 0.40879323597232897, "grad_norm": 0.4774166941642761, "learning_rate": 6.875937528398237e-05, "loss": 2.8123, "step": 4986 }, { "epoch": 0.4089572124007174, "grad_norm": 0.45405611395835876, "learning_rate": 6.873424008580701e-05, "loss": 2.8075, "step": 4988 }, { "epoch": 0.4091211888291058, "grad_norm": 0.45312246680259705, "learning_rate": 6.87090993788724e-05, "loss": 2.8432, "step": 4990 }, { "epoch": 0.40928516525749425, "grad_norm": 0.4497641324996948, "learning_rate": 6.86839531705711e-05, "loss": 2.8694, "step": 4992 }, { "epoch": 0.40944914168588264, "grad_norm": 0.49629077315330505, "learning_rate": 6.865880146829727e-05, "loss": 2.8581, "step": 4994 }, { "epoch": 0.4096131181142711, "grad_norm": 0.4826986491680145, "learning_rate": 6.863364427944673e-05, "loss": 2.8362, "step": 4996 }, { "epoch": 0.4097770945426595, "grad_norm": 0.4984830617904663, "learning_rate": 6.86084816114169e-05, "loss": 2.8219, "step": 4998 }, { "epoch": 0.4099410709710479, "grad_norm": 0.48046591877937317, "learning_rate": 6.858331347160678e-05, "loss": 2.8906, "step": 5000 }, { "epoch": 0.4101050473994363, "grad_norm": 0.43370234966278076, "learning_rate": 6.855813986741701e-05, "loss": 2.7766, "step": 5002 }, { "epoch": 0.41026902382782476, "grad_norm": 0.4225366413593292, "learning_rate": 6.853296080624984e-05, "loss": 2.8594, "step": 5004 }, { "epoch": 0.41043300025621315, "grad_norm": 0.4410724639892578, "learning_rate": 6.85077762955091e-05, "loss": 2.7648, "step": 5006 }, { "epoch": 0.4105969766846016, "grad_norm": 0.47217485308647156, "learning_rate": 6.848258634260026e-05, "loss": 2.8507, "step": 5008 }, { "epoch": 0.41076095311299, "grad_norm": 0.45836207270622253, "learning_rate": 6.845739095493033e-05, "loss": 2.8208, "step": 5010 }, { "epoch": 0.41092492954137844, "grad_norm": 0.44076651334762573, "learning_rate": 6.843219013990801e-05, "loss": 2.8757, "step": 5012 }, { "epoch": 0.4110889059697668, "grad_norm": 0.4325857162475586, "learning_rate": 6.840698390494346e-05, "loss": 2.8759, "step": 5014 }, { "epoch": 0.4112528823981553, "grad_norm": 0.43539878726005554, "learning_rate": 6.838177225744859e-05, "loss": 2.8535, "step": 5016 }, { "epoch": 0.41141685882654366, "grad_norm": 0.4194115102291107, "learning_rate": 6.835655520483677e-05, "loss": 2.8644, "step": 5018 }, { "epoch": 0.4115808352549321, "grad_norm": 0.4883398711681366, "learning_rate": 6.833133275452305e-05, "loss": 2.843, "step": 5020 }, { "epoch": 0.4117448116833205, "grad_norm": 0.462043821811676, "learning_rate": 6.830610491392403e-05, "loss": 2.8797, "step": 5022 }, { "epoch": 0.41190878811170895, "grad_norm": 0.4517926871776581, "learning_rate": 6.828087169045788e-05, "loss": 2.8508, "step": 5024 }, { "epoch": 0.41207276454009734, "grad_norm": 0.4353565573692322, "learning_rate": 6.825563309154437e-05, "loss": 2.8581, "step": 5026 }, { "epoch": 0.4122367409684858, "grad_norm": 0.4237254858016968, "learning_rate": 6.823038912460488e-05, "loss": 2.8593, "step": 5028 }, { "epoch": 0.4124007173968742, "grad_norm": 0.4407292902469635, "learning_rate": 6.820513979706232e-05, "loss": 2.8378, "step": 5030 }, { "epoch": 0.4125646938252626, "grad_norm": 0.4466221332550049, "learning_rate": 6.817988511634117e-05, "loss": 2.8888, "step": 5032 }, { "epoch": 0.41272867025365106, "grad_norm": 0.454248309135437, "learning_rate": 6.815462508986755e-05, "loss": 2.9008, "step": 5034 }, { "epoch": 0.41289264668203945, "grad_norm": 0.415745347738266, "learning_rate": 6.812935972506909e-05, "loss": 2.8414, "step": 5036 }, { "epoch": 0.4130566231104279, "grad_norm": 0.43440842628479004, "learning_rate": 6.810408902937503e-05, "loss": 2.8519, "step": 5038 }, { "epoch": 0.4132205995388163, "grad_norm": 0.42548617720603943, "learning_rate": 6.807881301021614e-05, "loss": 2.8922, "step": 5040 }, { "epoch": 0.41338457596720474, "grad_norm": 0.4431571066379547, "learning_rate": 6.805353167502476e-05, "loss": 2.8186, "step": 5042 }, { "epoch": 0.4135485523955931, "grad_norm": 0.4349539577960968, "learning_rate": 6.802824503123484e-05, "loss": 2.8589, "step": 5044 }, { "epoch": 0.4137125288239816, "grad_norm": 0.46239006519317627, "learning_rate": 6.800295308628186e-05, "loss": 2.8455, "step": 5046 }, { "epoch": 0.41387650525236996, "grad_norm": 0.41889017820358276, "learning_rate": 6.79776558476028e-05, "loss": 2.8039, "step": 5048 }, { "epoch": 0.4140404816807584, "grad_norm": 0.4747699201107025, "learning_rate": 6.795235332263631e-05, "loss": 2.8786, "step": 5050 }, { "epoch": 0.4142044581091468, "grad_norm": 0.4729340672492981, "learning_rate": 6.792704551882255e-05, "loss": 2.8934, "step": 5052 }, { "epoch": 0.41436843453753525, "grad_norm": 0.4421963691711426, "learning_rate": 6.790173244360318e-05, "loss": 2.8233, "step": 5054 }, { "epoch": 0.41453241096592364, "grad_norm": 0.46226418018341064, "learning_rate": 6.787641410442146e-05, "loss": 2.8198, "step": 5056 }, { "epoch": 0.4146963873943121, "grad_norm": 0.47469133138656616, "learning_rate": 6.785109050872218e-05, "loss": 2.9181, "step": 5058 }, { "epoch": 0.4148603638227005, "grad_norm": 0.4686332643032074, "learning_rate": 6.782576166395171e-05, "loss": 2.8448, "step": 5060 }, { "epoch": 0.4150243402510889, "grad_norm": 0.4484609067440033, "learning_rate": 6.780042757755791e-05, "loss": 2.8942, "step": 5062 }, { "epoch": 0.4151883166794773, "grad_norm": 0.416677325963974, "learning_rate": 6.777508825699024e-05, "loss": 2.8765, "step": 5064 }, { "epoch": 0.41535229310786576, "grad_norm": 0.4481672942638397, "learning_rate": 6.774974370969964e-05, "loss": 2.893, "step": 5066 }, { "epoch": 0.41551626953625415, "grad_norm": 0.4461904466152191, "learning_rate": 6.772439394313861e-05, "loss": 2.799, "step": 5068 }, { "epoch": 0.4156802459646426, "grad_norm": 0.45392054319381714, "learning_rate": 6.769903896476122e-05, "loss": 2.8791, "step": 5070 }, { "epoch": 0.415844222393031, "grad_norm": 0.4950689971446991, "learning_rate": 6.767367878202302e-05, "loss": 2.8379, "step": 5072 }, { "epoch": 0.41600819882141943, "grad_norm": 0.4642818868160248, "learning_rate": 6.764831340238111e-05, "loss": 2.8984, "step": 5074 }, { "epoch": 0.4161721752498078, "grad_norm": 0.44757944345474243, "learning_rate": 6.762294283329413e-05, "loss": 2.8848, "step": 5076 }, { "epoch": 0.41633615167819626, "grad_norm": 0.4181385040283203, "learning_rate": 6.759756708222224e-05, "loss": 2.8353, "step": 5078 }, { "epoch": 0.41650012810658466, "grad_norm": 0.4409900903701782, "learning_rate": 6.757218615662711e-05, "loss": 2.8491, "step": 5080 }, { "epoch": 0.4166641045349731, "grad_norm": 0.48207783699035645, "learning_rate": 6.754680006397193e-05, "loss": 2.854, "step": 5082 }, { "epoch": 0.4168280809633615, "grad_norm": 0.44279900193214417, "learning_rate": 6.752140881172146e-05, "loss": 2.8975, "step": 5084 }, { "epoch": 0.41699205739174994, "grad_norm": 0.4284610450267792, "learning_rate": 6.74960124073419e-05, "loss": 2.9468, "step": 5086 }, { "epoch": 0.41715603382013833, "grad_norm": 0.4538637697696686, "learning_rate": 6.747061085830102e-05, "loss": 2.9049, "step": 5088 }, { "epoch": 0.4173200102485268, "grad_norm": 0.4139115512371063, "learning_rate": 6.744520417206808e-05, "loss": 2.7537, "step": 5090 }, { "epoch": 0.4174839866769152, "grad_norm": 0.41832759976387024, "learning_rate": 6.741979235611384e-05, "loss": 2.831, "step": 5092 }, { "epoch": 0.4176479631053036, "grad_norm": 0.4567616283893585, "learning_rate": 6.739437541791062e-05, "loss": 2.8572, "step": 5094 }, { "epoch": 0.41781193953369206, "grad_norm": 0.45696723461151123, "learning_rate": 6.73689533649322e-05, "loss": 2.8366, "step": 5096 }, { "epoch": 0.41797591596208045, "grad_norm": 0.46061649918556213, "learning_rate": 6.734352620465386e-05, "loss": 2.902, "step": 5098 }, { "epoch": 0.4181398923904689, "grad_norm": 0.445056676864624, "learning_rate": 6.731809394455242e-05, "loss": 2.8579, "step": 5100 }, { "epoch": 0.4183038688188573, "grad_norm": 0.4230596125125885, "learning_rate": 6.729265659210614e-05, "loss": 2.8865, "step": 5102 }, { "epoch": 0.41846784524724573, "grad_norm": 0.4370947480201721, "learning_rate": 6.726721415479485e-05, "loss": 2.8129, "step": 5104 }, { "epoch": 0.4186318216756341, "grad_norm": 0.4428729712963104, "learning_rate": 6.724176664009983e-05, "loss": 2.818, "step": 5106 }, { "epoch": 0.41879579810402257, "grad_norm": 0.45244166254997253, "learning_rate": 6.721631405550385e-05, "loss": 2.8146, "step": 5108 }, { "epoch": 0.41895977453241096, "grad_norm": 0.44132840633392334, "learning_rate": 6.719085640849119e-05, "loss": 2.8328, "step": 5110 }, { "epoch": 0.4191237509607994, "grad_norm": 0.4221113324165344, "learning_rate": 6.716539370654761e-05, "loss": 2.8828, "step": 5112 }, { "epoch": 0.4192877273891878, "grad_norm": 0.4474412202835083, "learning_rate": 6.713992595716035e-05, "loss": 2.7689, "step": 5114 }, { "epoch": 0.41945170381757624, "grad_norm": 0.4391607642173767, "learning_rate": 6.711445316781815e-05, "loss": 2.8365, "step": 5116 }, { "epoch": 0.41961568024596463, "grad_norm": 0.41488832235336304, "learning_rate": 6.708897534601124e-05, "loss": 2.8563, "step": 5118 }, { "epoch": 0.4197796566743531, "grad_norm": 0.4629252851009369, "learning_rate": 6.706349249923129e-05, "loss": 2.8557, "step": 5120 }, { "epoch": 0.41994363310274146, "grad_norm": 0.4906052052974701, "learning_rate": 6.703800463497147e-05, "loss": 2.8408, "step": 5122 }, { "epoch": 0.4201076095311299, "grad_norm": 0.42281031608581543, "learning_rate": 6.701251176072645e-05, "loss": 2.8541, "step": 5124 }, { "epoch": 0.4202715859595183, "grad_norm": 0.4068866968154907, "learning_rate": 6.698701388399232e-05, "loss": 2.8388, "step": 5126 }, { "epoch": 0.42043556238790675, "grad_norm": 0.39713165163993835, "learning_rate": 6.696151101226669e-05, "loss": 2.8556, "step": 5128 }, { "epoch": 0.42059953881629514, "grad_norm": 0.4381765127182007, "learning_rate": 6.693600315304863e-05, "loss": 2.8704, "step": 5130 }, { "epoch": 0.4207635152446836, "grad_norm": 0.46095380187034607, "learning_rate": 6.691049031383864e-05, "loss": 2.8452, "step": 5132 }, { "epoch": 0.420927491673072, "grad_norm": 0.44617173075675964, "learning_rate": 6.68849725021387e-05, "loss": 2.8298, "step": 5134 }, { "epoch": 0.4210914681014604, "grad_norm": 0.407774418592453, "learning_rate": 6.685944972545233e-05, "loss": 2.8568, "step": 5136 }, { "epoch": 0.4212554445298488, "grad_norm": 0.40078461170196533, "learning_rate": 6.683392199128436e-05, "loss": 2.8673, "step": 5138 }, { "epoch": 0.42141942095823726, "grad_norm": 0.4423627555370331, "learning_rate": 6.68083893071412e-05, "loss": 2.7988, "step": 5140 }, { "epoch": 0.42158339738662565, "grad_norm": 0.4524652659893036, "learning_rate": 6.678285168053069e-05, "loss": 2.8361, "step": 5142 }, { "epoch": 0.4217473738150141, "grad_norm": 0.4723981022834778, "learning_rate": 6.675730911896209e-05, "loss": 2.7871, "step": 5144 }, { "epoch": 0.4219113502434025, "grad_norm": 0.4568463861942291, "learning_rate": 6.673176162994612e-05, "loss": 2.8334, "step": 5146 }, { "epoch": 0.42207532667179093, "grad_norm": 0.4477904736995697, "learning_rate": 6.670620922099497e-05, "loss": 2.7981, "step": 5148 }, { "epoch": 0.4222393031001794, "grad_norm": 0.40692979097366333, "learning_rate": 6.668065189962229e-05, "loss": 2.8587, "step": 5150 }, { "epoch": 0.42240327952856777, "grad_norm": 0.4016028046607971, "learning_rate": 6.665508967334311e-05, "loss": 2.9009, "step": 5152 }, { "epoch": 0.4225672559569562, "grad_norm": 0.43338680267333984, "learning_rate": 6.662952254967396e-05, "loss": 2.8658, "step": 5154 }, { "epoch": 0.4227312323853446, "grad_norm": 0.481141597032547, "learning_rate": 6.660395053613278e-05, "loss": 2.8064, "step": 5156 }, { "epoch": 0.42289520881373305, "grad_norm": 0.46163663268089294, "learning_rate": 6.657837364023899e-05, "loss": 2.8979, "step": 5158 }, { "epoch": 0.42305918524212144, "grad_norm": 0.41502735018730164, "learning_rate": 6.65527918695134e-05, "loss": 2.907, "step": 5160 }, { "epoch": 0.4232231616705099, "grad_norm": 0.44523778557777405, "learning_rate": 6.652720523147826e-05, "loss": 2.8417, "step": 5162 }, { "epoch": 0.4233871380988983, "grad_norm": 0.40618014335632324, "learning_rate": 6.650161373365726e-05, "loss": 2.8888, "step": 5164 }, { "epoch": 0.4235511145272867, "grad_norm": 0.4037351608276367, "learning_rate": 6.647601738357555e-05, "loss": 2.8301, "step": 5166 }, { "epoch": 0.4237150909556751, "grad_norm": 0.42846325039863586, "learning_rate": 6.645041618875965e-05, "loss": 2.8354, "step": 5168 }, { "epoch": 0.42387906738406356, "grad_norm": 0.4747539758682251, "learning_rate": 6.642481015673751e-05, "loss": 2.8743, "step": 5170 }, { "epoch": 0.42404304381245195, "grad_norm": 0.44995033740997314, "learning_rate": 6.63991992950386e-05, "loss": 2.842, "step": 5172 }, { "epoch": 0.4242070202408404, "grad_norm": 0.440548300743103, "learning_rate": 6.637358361119366e-05, "loss": 2.8511, "step": 5174 }, { "epoch": 0.4243709966692288, "grad_norm": 0.4418390691280365, "learning_rate": 6.634796311273493e-05, "loss": 2.8331, "step": 5176 }, { "epoch": 0.42453497309761723, "grad_norm": 0.44166362285614014, "learning_rate": 6.63223378071961e-05, "loss": 2.7898, "step": 5178 }, { "epoch": 0.4246989495260056, "grad_norm": 0.45935314893722534, "learning_rate": 6.629670770211218e-05, "loss": 2.8341, "step": 5180 }, { "epoch": 0.42486292595439407, "grad_norm": 0.45904242992401123, "learning_rate": 6.627107280501968e-05, "loss": 2.8524, "step": 5182 }, { "epoch": 0.42502690238278246, "grad_norm": 0.4371541142463684, "learning_rate": 6.624543312345645e-05, "loss": 2.8115, "step": 5184 }, { "epoch": 0.4251908788111709, "grad_norm": 0.4144454598426819, "learning_rate": 6.621978866496181e-05, "loss": 2.8042, "step": 5186 }, { "epoch": 0.4253548552395593, "grad_norm": 0.42282989621162415, "learning_rate": 6.619413943707642e-05, "loss": 2.8102, "step": 5188 }, { "epoch": 0.42551883166794774, "grad_norm": 0.3988903760910034, "learning_rate": 6.616848544734243e-05, "loss": 2.8422, "step": 5190 }, { "epoch": 0.42568280809633613, "grad_norm": 0.4397544264793396, "learning_rate": 6.614282670330327e-05, "loss": 2.8699, "step": 5192 }, { "epoch": 0.4258467845247246, "grad_norm": 0.4363107681274414, "learning_rate": 6.611716321250387e-05, "loss": 2.8547, "step": 5194 }, { "epoch": 0.42601076095311297, "grad_norm": 0.4343310594558716, "learning_rate": 6.609149498249052e-05, "loss": 2.7935, "step": 5196 }, { "epoch": 0.4261747373815014, "grad_norm": 0.40959495306015015, "learning_rate": 6.606582202081089e-05, "loss": 2.8412, "step": 5198 }, { "epoch": 0.4263387138098898, "grad_norm": 0.40372273325920105, "learning_rate": 6.604014433501404e-05, "loss": 2.8454, "step": 5200 }, { "epoch": 0.42650269023827825, "grad_norm": 0.46662813425064087, "learning_rate": 6.601446193265048e-05, "loss": 2.8771, "step": 5202 }, { "epoch": 0.4266666666666667, "grad_norm": 0.42457115650177, "learning_rate": 6.598877482127201e-05, "loss": 2.8032, "step": 5204 }, { "epoch": 0.4268306430950551, "grad_norm": 0.5101222991943359, "learning_rate": 6.596308300843188e-05, "loss": 2.8584, "step": 5206 }, { "epoch": 0.42699461952344353, "grad_norm": 0.40872541069984436, "learning_rate": 6.593738650168473e-05, "loss": 2.9095, "step": 5208 }, { "epoch": 0.4271585959518319, "grad_norm": 0.46287456154823303, "learning_rate": 6.591168530858653e-05, "loss": 2.84, "step": 5210 }, { "epoch": 0.42732257238022037, "grad_norm": 0.4474398195743561, "learning_rate": 6.588597943669465e-05, "loss": 2.8481, "step": 5212 }, { "epoch": 0.42748654880860876, "grad_norm": 0.444132536649704, "learning_rate": 6.586026889356789e-05, "loss": 2.8654, "step": 5214 }, { "epoch": 0.4276505252369972, "grad_norm": 0.4312966465950012, "learning_rate": 6.583455368676632e-05, "loss": 2.8346, "step": 5216 }, { "epoch": 0.4278145016653856, "grad_norm": 0.43802088499069214, "learning_rate": 6.580883382385148e-05, "loss": 2.8654, "step": 5218 }, { "epoch": 0.42797847809377404, "grad_norm": 0.47087231278419495, "learning_rate": 6.578310931238619e-05, "loss": 2.8939, "step": 5220 }, { "epoch": 0.42814245452216243, "grad_norm": 0.4582447111606598, "learning_rate": 6.575738015993473e-05, "loss": 2.8367, "step": 5222 }, { "epoch": 0.4283064309505509, "grad_norm": 0.4460625648498535, "learning_rate": 6.573164637406264e-05, "loss": 2.7906, "step": 5224 }, { "epoch": 0.42847040737893927, "grad_norm": 0.4364650547504425, "learning_rate": 6.570590796233693e-05, "loss": 2.8239, "step": 5226 }, { "epoch": 0.4286343838073277, "grad_norm": 0.4625886082649231, "learning_rate": 6.568016493232589e-05, "loss": 2.8592, "step": 5228 }, { "epoch": 0.4287983602357161, "grad_norm": 0.4330351948738098, "learning_rate": 6.565441729159923e-05, "loss": 2.8348, "step": 5230 }, { "epoch": 0.42896233666410455, "grad_norm": 0.42922812700271606, "learning_rate": 6.562866504772795e-05, "loss": 2.8507, "step": 5232 }, { "epoch": 0.42912631309249294, "grad_norm": 0.4726974070072174, "learning_rate": 6.560290820828443e-05, "loss": 2.8718, "step": 5234 }, { "epoch": 0.4292902895208814, "grad_norm": 0.42944204807281494, "learning_rate": 6.557714678084243e-05, "loss": 2.7942, "step": 5236 }, { "epoch": 0.4294542659492698, "grad_norm": 0.41010624170303345, "learning_rate": 6.555138077297707e-05, "loss": 2.8685, "step": 5238 }, { "epoch": 0.4296182423776582, "grad_norm": 0.4391094148159027, "learning_rate": 6.552561019226471e-05, "loss": 2.844, "step": 5240 }, { "epoch": 0.4297822188060466, "grad_norm": 0.4501963257789612, "learning_rate": 6.549983504628318e-05, "loss": 2.7881, "step": 5242 }, { "epoch": 0.42994619523443506, "grad_norm": 0.4697725176811218, "learning_rate": 6.54740553426116e-05, "loss": 2.8676, "step": 5244 }, { "epoch": 0.43011017166282345, "grad_norm": 0.46249881386756897, "learning_rate": 6.544827108883041e-05, "loss": 2.8892, "step": 5246 }, { "epoch": 0.4302741480912119, "grad_norm": 0.414722740650177, "learning_rate": 6.542248229252139e-05, "loss": 2.8313, "step": 5248 }, { "epoch": 0.4304381245196003, "grad_norm": 0.43396633863449097, "learning_rate": 6.539668896126774e-05, "loss": 2.8321, "step": 5250 }, { "epoch": 0.43060210094798873, "grad_norm": 0.44429492950439453, "learning_rate": 6.537089110265387e-05, "loss": 2.8523, "step": 5252 }, { "epoch": 0.4307660773763771, "grad_norm": 0.46500155329704285, "learning_rate": 6.53450887242656e-05, "loss": 2.7915, "step": 5254 }, { "epoch": 0.43093005380476557, "grad_norm": 0.5475171804428101, "learning_rate": 6.531928183369008e-05, "loss": 2.8314, "step": 5256 }, { "epoch": 0.43109403023315396, "grad_norm": 0.5729891657829285, "learning_rate": 6.529347043851573e-05, "loss": 2.8863, "step": 5258 }, { "epoch": 0.4312580066615424, "grad_norm": 0.4980946481227875, "learning_rate": 6.526765454633235e-05, "loss": 2.8636, "step": 5260 }, { "epoch": 0.43142198308993085, "grad_norm": 0.4520968198776245, "learning_rate": 6.524183416473103e-05, "loss": 2.8461, "step": 5262 }, { "epoch": 0.43158595951831924, "grad_norm": 0.38656288385391235, "learning_rate": 6.52160093013042e-05, "loss": 2.8186, "step": 5264 }, { "epoch": 0.4317499359467077, "grad_norm": 0.39334821701049805, "learning_rate": 6.519017996364562e-05, "loss": 2.8716, "step": 5266 }, { "epoch": 0.4319139123750961, "grad_norm": 0.41815948486328125, "learning_rate": 6.51643461593503e-05, "loss": 2.804, "step": 5268 }, { "epoch": 0.4320778888034845, "grad_norm": 0.48571309447288513, "learning_rate": 6.513850789601466e-05, "loss": 2.8229, "step": 5270 }, { "epoch": 0.4322418652318729, "grad_norm": 0.46701404452323914, "learning_rate": 6.511266518123633e-05, "loss": 2.8247, "step": 5272 }, { "epoch": 0.43240584166026136, "grad_norm": 0.46157705783843994, "learning_rate": 6.508681802261435e-05, "loss": 2.842, "step": 5274 }, { "epoch": 0.43256981808864975, "grad_norm": 0.4298838675022125, "learning_rate": 6.506096642774897e-05, "loss": 2.8514, "step": 5276 }, { "epoch": 0.4327337945170382, "grad_norm": 0.43288126587867737, "learning_rate": 6.503511040424182e-05, "loss": 2.8294, "step": 5278 }, { "epoch": 0.4328977709454266, "grad_norm": 0.43317630887031555, "learning_rate": 6.500924995969582e-05, "loss": 2.8552, "step": 5280 }, { "epoch": 0.43306174737381503, "grad_norm": 0.41484972834587097, "learning_rate": 6.498338510171514e-05, "loss": 2.7769, "step": 5282 }, { "epoch": 0.4332257238022034, "grad_norm": 0.41651368141174316, "learning_rate": 6.495751583790526e-05, "loss": 2.8219, "step": 5284 }, { "epoch": 0.43338970023059187, "grad_norm": 0.4511995315551758, "learning_rate": 6.493164217587303e-05, "loss": 2.7643, "step": 5286 }, { "epoch": 0.43355367665898026, "grad_norm": 0.4024675488471985, "learning_rate": 6.490576412322652e-05, "loss": 2.7651, "step": 5288 }, { "epoch": 0.4337176530873687, "grad_norm": 0.411886066198349, "learning_rate": 6.48798816875751e-05, "loss": 2.8321, "step": 5290 }, { "epoch": 0.4338816295157571, "grad_norm": 0.43117591738700867, "learning_rate": 6.485399487652945e-05, "loss": 2.8002, "step": 5292 }, { "epoch": 0.43404560594414554, "grad_norm": 0.42103347182273865, "learning_rate": 6.48281036977015e-05, "loss": 2.8772, "step": 5294 }, { "epoch": 0.43420958237253393, "grad_norm": 0.4188506305217743, "learning_rate": 6.480220815870453e-05, "loss": 2.8489, "step": 5296 }, { "epoch": 0.4343735588009224, "grad_norm": 0.42243778705596924, "learning_rate": 6.477630826715305e-05, "loss": 2.8632, "step": 5298 }, { "epoch": 0.43453753522931077, "grad_norm": 0.45281627774238586, "learning_rate": 6.475040403066284e-05, "loss": 2.8428, "step": 5300 }, { "epoch": 0.4347015116576992, "grad_norm": 0.43130266666412354, "learning_rate": 6.472449545685099e-05, "loss": 2.8076, "step": 5302 }, { "epoch": 0.4348654880860876, "grad_norm": 0.4273144006729126, "learning_rate": 6.469858255333588e-05, "loss": 2.7982, "step": 5304 }, { "epoch": 0.43502946451447605, "grad_norm": 0.43439432978630066, "learning_rate": 6.46726653277371e-05, "loss": 2.8237, "step": 5306 }, { "epoch": 0.43519344094286444, "grad_norm": 0.42046454548835754, "learning_rate": 6.464674378767558e-05, "loss": 2.8721, "step": 5308 }, { "epoch": 0.4353574173712529, "grad_norm": 0.4046113193035126, "learning_rate": 6.462081794077348e-05, "loss": 2.8418, "step": 5310 }, { "epoch": 0.4355213937996413, "grad_norm": 0.3967956006526947, "learning_rate": 6.459488779465424e-05, "loss": 2.8118, "step": 5312 }, { "epoch": 0.4356853702280297, "grad_norm": 0.3960517644882202, "learning_rate": 6.456895335694253e-05, "loss": 2.8551, "step": 5314 }, { "epoch": 0.4358493466564181, "grad_norm": 0.3832727372646332, "learning_rate": 6.454301463526434e-05, "loss": 2.8441, "step": 5316 }, { "epoch": 0.43601332308480656, "grad_norm": 0.3792496621608734, "learning_rate": 6.451707163724687e-05, "loss": 2.8411, "step": 5318 }, { "epoch": 0.436177299513195, "grad_norm": 0.39607372879981995, "learning_rate": 6.449112437051862e-05, "loss": 2.8356, "step": 5320 }, { "epoch": 0.4363412759415834, "grad_norm": 0.4213770031929016, "learning_rate": 6.446517284270932e-05, "loss": 2.8326, "step": 5322 }, { "epoch": 0.43650525236997184, "grad_norm": 0.43514689803123474, "learning_rate": 6.443921706144992e-05, "loss": 2.8283, "step": 5324 }, { "epoch": 0.43666922879836023, "grad_norm": 0.4403941333293915, "learning_rate": 6.441325703437269e-05, "loss": 2.826, "step": 5326 }, { "epoch": 0.4368332052267487, "grad_norm": 0.4238094091415405, "learning_rate": 6.438729276911112e-05, "loss": 2.8394, "step": 5328 }, { "epoch": 0.43699718165513707, "grad_norm": 0.4089421033859253, "learning_rate": 6.436132427329992e-05, "loss": 2.8331, "step": 5330 }, { "epoch": 0.4371611580835255, "grad_norm": 0.4226435720920563, "learning_rate": 6.433535155457508e-05, "loss": 2.8852, "step": 5332 }, { "epoch": 0.4373251345119139, "grad_norm": 0.4296805262565613, "learning_rate": 6.43093746205738e-05, "loss": 2.8884, "step": 5334 }, { "epoch": 0.43748911094030235, "grad_norm": 0.449306845664978, "learning_rate": 6.428339347893456e-05, "loss": 2.8108, "step": 5336 }, { "epoch": 0.43765308736869074, "grad_norm": 0.4620767831802368, "learning_rate": 6.425740813729704e-05, "loss": 2.8515, "step": 5338 }, { "epoch": 0.4378170637970792, "grad_norm": 0.41048452258110046, "learning_rate": 6.423141860330216e-05, "loss": 2.8679, "step": 5340 }, { "epoch": 0.4379810402254676, "grad_norm": 0.412546306848526, "learning_rate": 6.42054248845921e-05, "loss": 2.8475, "step": 5342 }, { "epoch": 0.438145016653856, "grad_norm": 0.42965230345726013, "learning_rate": 6.417942698881023e-05, "loss": 2.793, "step": 5344 }, { "epoch": 0.4383089930822444, "grad_norm": 0.4410039484500885, "learning_rate": 6.41534249236012e-05, "loss": 2.8357, "step": 5346 }, { "epoch": 0.43847296951063286, "grad_norm": 0.4114231467247009, "learning_rate": 6.412741869661082e-05, "loss": 2.8695, "step": 5348 }, { "epoch": 0.43863694593902125, "grad_norm": 0.4227801561355591, "learning_rate": 6.410140831548619e-05, "loss": 2.8338, "step": 5350 }, { "epoch": 0.4388009223674097, "grad_norm": 0.3871307373046875, "learning_rate": 6.40753937878756e-05, "loss": 2.8302, "step": 5352 }, { "epoch": 0.4389648987957981, "grad_norm": 0.3907979130744934, "learning_rate": 6.404937512142852e-05, "loss": 2.8185, "step": 5354 }, { "epoch": 0.43912887522418653, "grad_norm": 0.4412355422973633, "learning_rate": 6.402335232379576e-05, "loss": 2.8504, "step": 5356 }, { "epoch": 0.4392928516525749, "grad_norm": 0.450156033039093, "learning_rate": 6.399732540262916e-05, "loss": 2.8437, "step": 5358 }, { "epoch": 0.43945682808096337, "grad_norm": 0.465420663356781, "learning_rate": 6.397129436558196e-05, "loss": 2.836, "step": 5360 }, { "epoch": 0.43962080450935176, "grad_norm": 0.4449327886104584, "learning_rate": 6.394525922030848e-05, "loss": 2.7505, "step": 5362 }, { "epoch": 0.4397847809377402, "grad_norm": 0.41881614923477173, "learning_rate": 6.391921997446431e-05, "loss": 2.8624, "step": 5364 }, { "epoch": 0.4399487573661286, "grad_norm": 0.4254155158996582, "learning_rate": 6.38931766357062e-05, "loss": 2.8722, "step": 5366 }, { "epoch": 0.44011273379451704, "grad_norm": 0.40859904885292053, "learning_rate": 6.386712921169218e-05, "loss": 2.8448, "step": 5368 }, { "epoch": 0.44027671022290543, "grad_norm": 0.4507814645767212, "learning_rate": 6.384107771008141e-05, "loss": 2.8432, "step": 5370 }, { "epoch": 0.4404406866512939, "grad_norm": 0.4630066454410553, "learning_rate": 6.381502213853425e-05, "loss": 2.868, "step": 5372 }, { "epoch": 0.44060466307968227, "grad_norm": 0.43618708848953247, "learning_rate": 6.378896250471232e-05, "loss": 2.7984, "step": 5374 }, { "epoch": 0.4407686395080707, "grad_norm": 0.41137388348579407, "learning_rate": 6.37628988162784e-05, "loss": 2.8467, "step": 5376 }, { "epoch": 0.44093261593645916, "grad_norm": 0.4223901927471161, "learning_rate": 6.373683108089639e-05, "loss": 2.7786, "step": 5378 }, { "epoch": 0.44109659236484755, "grad_norm": 0.46738335490226746, "learning_rate": 6.371075930623151e-05, "loss": 2.8053, "step": 5380 }, { "epoch": 0.441260568793236, "grad_norm": 0.5173202753067017, "learning_rate": 6.368468349995009e-05, "loss": 2.852, "step": 5382 }, { "epoch": 0.4414245452216244, "grad_norm": 0.4798794090747833, "learning_rate": 6.365860366971965e-05, "loss": 2.8693, "step": 5384 }, { "epoch": 0.44158852165001283, "grad_norm": 0.4778260588645935, "learning_rate": 6.363251982320891e-05, "loss": 2.8151, "step": 5386 }, { "epoch": 0.4417524980784012, "grad_norm": 0.4863475561141968, "learning_rate": 6.360643196808774e-05, "loss": 2.8251, "step": 5388 }, { "epoch": 0.44191647450678967, "grad_norm": 0.4321562647819519, "learning_rate": 6.358034011202724e-05, "loss": 2.7902, "step": 5390 }, { "epoch": 0.44208045093517806, "grad_norm": 0.43023213744163513, "learning_rate": 6.355424426269965e-05, "loss": 2.8054, "step": 5392 }, { "epoch": 0.4422444273635665, "grad_norm": 0.4235995411872864, "learning_rate": 6.352814442777842e-05, "loss": 2.8321, "step": 5394 }, { "epoch": 0.4424084037919549, "grad_norm": 0.43662554025650024, "learning_rate": 6.350204061493808e-05, "loss": 2.8302, "step": 5396 }, { "epoch": 0.44257238022034334, "grad_norm": 0.4400186538696289, "learning_rate": 6.347593283185444e-05, "loss": 2.8407, "step": 5398 }, { "epoch": 0.44273635664873173, "grad_norm": 0.4381493926048279, "learning_rate": 6.344982108620445e-05, "loss": 2.8063, "step": 5400 }, { "epoch": 0.4429003330771202, "grad_norm": 0.41799694299697876, "learning_rate": 6.342370538566617e-05, "loss": 2.796, "step": 5402 }, { "epoch": 0.44306430950550857, "grad_norm": 0.4099697172641754, "learning_rate": 6.339758573791888e-05, "loss": 2.8589, "step": 5404 }, { "epoch": 0.443228285933897, "grad_norm": 0.42744383215904236, "learning_rate": 6.337146215064298e-05, "loss": 2.9065, "step": 5406 }, { "epoch": 0.4433922623622854, "grad_norm": 0.43363282084465027, "learning_rate": 6.334533463152008e-05, "loss": 2.846, "step": 5408 }, { "epoch": 0.44355623879067385, "grad_norm": 0.46505486965179443, "learning_rate": 6.33192031882329e-05, "loss": 2.8277, "step": 5410 }, { "epoch": 0.44372021521906224, "grad_norm": 0.4066617488861084, "learning_rate": 6.329306782846532e-05, "loss": 2.8879, "step": 5412 }, { "epoch": 0.4438841916474507, "grad_norm": 0.4458906054496765, "learning_rate": 6.326692855990239e-05, "loss": 2.861, "step": 5414 }, { "epoch": 0.4440481680758391, "grad_norm": 0.45811498165130615, "learning_rate": 6.32407853902303e-05, "loss": 2.8391, "step": 5416 }, { "epoch": 0.4442121445042275, "grad_norm": 0.42726707458496094, "learning_rate": 6.32146383271364e-05, "loss": 2.7785, "step": 5418 }, { "epoch": 0.4443761209326159, "grad_norm": 0.3991510570049286, "learning_rate": 6.318848737830916e-05, "loss": 2.8428, "step": 5420 }, { "epoch": 0.44454009736100436, "grad_norm": 0.4369128942489624, "learning_rate": 6.31623325514382e-05, "loss": 2.8468, "step": 5422 }, { "epoch": 0.44470407378939275, "grad_norm": 0.44972068071365356, "learning_rate": 6.31361738542143e-05, "loss": 2.8408, "step": 5424 }, { "epoch": 0.4448680502177812, "grad_norm": 0.4460034668445587, "learning_rate": 6.311001129432936e-05, "loss": 2.8325, "step": 5426 }, { "epoch": 0.4450320266461696, "grad_norm": 0.4248029589653015, "learning_rate": 6.308384487947639e-05, "loss": 2.8542, "step": 5428 }, { "epoch": 0.44519600307455803, "grad_norm": 0.42048823833465576, "learning_rate": 6.30576746173496e-05, "loss": 2.8299, "step": 5430 }, { "epoch": 0.4453599795029465, "grad_norm": 0.46963199973106384, "learning_rate": 6.30315005156443e-05, "loss": 2.8312, "step": 5432 }, { "epoch": 0.44552395593133487, "grad_norm": 0.44268596172332764, "learning_rate": 6.300532258205688e-05, "loss": 2.7943, "step": 5434 }, { "epoch": 0.4456879323597233, "grad_norm": 0.4529852867126465, "learning_rate": 6.297914082428491e-05, "loss": 2.801, "step": 5436 }, { "epoch": 0.4458519087881117, "grad_norm": 0.44438987970352173, "learning_rate": 6.295295525002713e-05, "loss": 2.8337, "step": 5438 }, { "epoch": 0.44601588521650015, "grad_norm": 0.48741820454597473, "learning_rate": 6.292676586698328e-05, "loss": 2.8151, "step": 5440 }, { "epoch": 0.44617986164488854, "grad_norm": 0.45658358931541443, "learning_rate": 6.29005726828543e-05, "loss": 2.848, "step": 5442 }, { "epoch": 0.446343838073277, "grad_norm": 0.4459148049354553, "learning_rate": 6.287437570534227e-05, "loss": 2.8042, "step": 5444 }, { "epoch": 0.4465078145016654, "grad_norm": 0.47021788358688354, "learning_rate": 6.28481749421503e-05, "loss": 2.8058, "step": 5446 }, { "epoch": 0.4466717909300538, "grad_norm": 0.4658445119857788, "learning_rate": 6.282197040098273e-05, "loss": 2.8652, "step": 5448 }, { "epoch": 0.4468357673584422, "grad_norm": 0.45639750361442566, "learning_rate": 6.279576208954487e-05, "loss": 2.8018, "step": 5450 }, { "epoch": 0.44699974378683066, "grad_norm": 0.45397526025772095, "learning_rate": 6.276955001554324e-05, "loss": 2.805, "step": 5452 }, { "epoch": 0.44716372021521905, "grad_norm": 0.4116973578929901, "learning_rate": 6.274333418668545e-05, "loss": 2.7981, "step": 5454 }, { "epoch": 0.4473276966436075, "grad_norm": 0.41127046942710876, "learning_rate": 6.27171146106802e-05, "loss": 2.8525, "step": 5456 }, { "epoch": 0.4474916730719959, "grad_norm": 0.4021296203136444, "learning_rate": 6.269089129523729e-05, "loss": 2.8321, "step": 5458 }, { "epoch": 0.44765564950038433, "grad_norm": 0.41415926814079285, "learning_rate": 6.266466424806762e-05, "loss": 2.8192, "step": 5460 }, { "epoch": 0.4478196259287727, "grad_norm": 0.4048265814781189, "learning_rate": 6.26384334768832e-05, "loss": 2.7977, "step": 5462 }, { "epoch": 0.44798360235716117, "grad_norm": 0.4210762083530426, "learning_rate": 6.261219898939712e-05, "loss": 2.8261, "step": 5464 }, { "epoch": 0.44814757878554956, "grad_norm": 0.43128538131713867, "learning_rate": 6.258596079332357e-05, "loss": 2.8386, "step": 5466 }, { "epoch": 0.448311555213938, "grad_norm": 0.42629754543304443, "learning_rate": 6.255971889637785e-05, "loss": 2.8455, "step": 5468 }, { "epoch": 0.4484755316423264, "grad_norm": 0.4861750304698944, "learning_rate": 6.25334733062763e-05, "loss": 2.8276, "step": 5470 }, { "epoch": 0.44863950807071484, "grad_norm": 0.4479605555534363, "learning_rate": 6.250722403073639e-05, "loss": 2.7939, "step": 5472 }, { "epoch": 0.44880348449910323, "grad_norm": 0.4483131170272827, "learning_rate": 6.248097107747665e-05, "loss": 2.8228, "step": 5474 }, { "epoch": 0.4489674609274917, "grad_norm": 0.48584070801734924, "learning_rate": 6.245471445421669e-05, "loss": 2.8331, "step": 5476 }, { "epoch": 0.44913143735588007, "grad_norm": 0.47229447960853577, "learning_rate": 6.242845416867721e-05, "loss": 2.7656, "step": 5478 }, { "epoch": 0.4492954137842685, "grad_norm": 0.45715320110321045, "learning_rate": 6.240219022858003e-05, "loss": 2.7746, "step": 5480 }, { "epoch": 0.4494593902126569, "grad_norm": 0.40935462713241577, "learning_rate": 6.237592264164794e-05, "loss": 2.7756, "step": 5482 }, { "epoch": 0.44962336664104535, "grad_norm": 0.42745450139045715, "learning_rate": 6.234965141560488e-05, "loss": 2.8465, "step": 5484 }, { "epoch": 0.44978734306943374, "grad_norm": 0.42606422305107117, "learning_rate": 6.232337655817587e-05, "loss": 2.8248, "step": 5486 }, { "epoch": 0.4499513194978222, "grad_norm": 0.455293744802475, "learning_rate": 6.229709807708694e-05, "loss": 2.8302, "step": 5488 }, { "epoch": 0.45011529592621063, "grad_norm": 0.43568676710128784, "learning_rate": 6.227081598006523e-05, "loss": 2.8069, "step": 5490 }, { "epoch": 0.450279272354599, "grad_norm": 0.43974193930625916, "learning_rate": 6.224453027483891e-05, "loss": 2.8403, "step": 5492 }, { "epoch": 0.45044324878298747, "grad_norm": 0.42426374554634094, "learning_rate": 6.221824096913727e-05, "loss": 2.8064, "step": 5494 }, { "epoch": 0.45060722521137586, "grad_norm": 0.4468149244785309, "learning_rate": 6.219194807069057e-05, "loss": 2.8305, "step": 5496 }, { "epoch": 0.4507712016397643, "grad_norm": 0.4190889596939087, "learning_rate": 6.216565158723022e-05, "loss": 2.8208, "step": 5498 }, { "epoch": 0.4509351780681527, "grad_norm": 0.4246452748775482, "learning_rate": 6.21393515264886e-05, "loss": 2.8203, "step": 5500 }, { "epoch": 0.45109915449654114, "grad_norm": 0.45627468824386597, "learning_rate": 6.211304789619918e-05, "loss": 2.8507, "step": 5502 }, { "epoch": 0.45126313092492953, "grad_norm": 0.45805448293685913, "learning_rate": 6.208674070409653e-05, "loss": 2.8456, "step": 5504 }, { "epoch": 0.451427107353318, "grad_norm": 0.4606642425060272, "learning_rate": 6.206042995791617e-05, "loss": 2.8356, "step": 5506 }, { "epoch": 0.45159108378170637, "grad_norm": 0.5084508657455444, "learning_rate": 6.203411566539472e-05, "loss": 2.7843, "step": 5508 }, { "epoch": 0.4517550602100948, "grad_norm": 0.4745926260948181, "learning_rate": 6.200779783426986e-05, "loss": 2.8339, "step": 5510 }, { "epoch": 0.4519190366384832, "grad_norm": 0.4591147303581238, "learning_rate": 6.198147647228027e-05, "loss": 2.8131, "step": 5512 }, { "epoch": 0.45208301306687165, "grad_norm": 0.42851564288139343, "learning_rate": 6.195515158716567e-05, "loss": 2.8114, "step": 5514 }, { "epoch": 0.45224698949526004, "grad_norm": 0.42879414558410645, "learning_rate": 6.192882318666687e-05, "loss": 2.784, "step": 5516 }, { "epoch": 0.4524109659236485, "grad_norm": 0.4082982838153839, "learning_rate": 6.190249127852565e-05, "loss": 2.8368, "step": 5518 }, { "epoch": 0.4525749423520369, "grad_norm": 0.4085899889469147, "learning_rate": 6.187615587048483e-05, "loss": 2.8495, "step": 5520 }, { "epoch": 0.4527389187804253, "grad_norm": 0.43112874031066895, "learning_rate": 6.18498169702883e-05, "loss": 2.7851, "step": 5522 }, { "epoch": 0.4529028952088137, "grad_norm": 0.438242644071579, "learning_rate": 6.182347458568096e-05, "loss": 2.8398, "step": 5524 }, { "epoch": 0.45306687163720216, "grad_norm": 0.48834484815597534, "learning_rate": 6.179712872440869e-05, "loss": 2.8107, "step": 5526 }, { "epoch": 0.45323084806559055, "grad_norm": 0.5261332988739014, "learning_rate": 6.177077939421845e-05, "loss": 2.8842, "step": 5528 }, { "epoch": 0.453394824493979, "grad_norm": 0.4741848409175873, "learning_rate": 6.174442660285818e-05, "loss": 2.8348, "step": 5530 }, { "epoch": 0.4535588009223674, "grad_norm": 0.43609732389450073, "learning_rate": 6.171807035807689e-05, "loss": 2.7855, "step": 5532 }, { "epoch": 0.45372277735075583, "grad_norm": 0.4651409089565277, "learning_rate": 6.169171066762456e-05, "loss": 2.8238, "step": 5534 }, { "epoch": 0.4538867537791442, "grad_norm": 0.4825083017349243, "learning_rate": 6.166534753925218e-05, "loss": 2.8005, "step": 5536 }, { "epoch": 0.45405073020753267, "grad_norm": 0.4591177701950073, "learning_rate": 6.163898098071178e-05, "loss": 2.8426, "step": 5538 }, { "epoch": 0.45421470663592106, "grad_norm": 0.4679067134857178, "learning_rate": 6.161261099975638e-05, "loss": 2.8282, "step": 5540 }, { "epoch": 0.4543786830643095, "grad_norm": 0.4355182647705078, "learning_rate": 6.158623760414002e-05, "loss": 2.7863, "step": 5542 }, { "epoch": 0.4545426594926979, "grad_norm": 0.43111327290534973, "learning_rate": 6.155986080161771e-05, "loss": 2.8526, "step": 5544 }, { "epoch": 0.45470663592108634, "grad_norm": 0.4271221160888672, "learning_rate": 6.153348059994551e-05, "loss": 2.8234, "step": 5546 }, { "epoch": 0.4548706123494748, "grad_norm": 0.4791860580444336, "learning_rate": 6.150709700688045e-05, "loss": 2.8127, "step": 5548 }, { "epoch": 0.4550345887778632, "grad_norm": 0.4541807174682617, "learning_rate": 6.148071003018055e-05, "loss": 2.8017, "step": 5550 }, { "epoch": 0.4551985652062516, "grad_norm": 0.43343642354011536, "learning_rate": 6.145431967760487e-05, "loss": 2.8638, "step": 5552 }, { "epoch": 0.45536254163464, "grad_norm": 0.42656823992729187, "learning_rate": 6.142792595691342e-05, "loss": 2.7703, "step": 5554 }, { "epoch": 0.45552651806302846, "grad_norm": 0.4463464915752411, "learning_rate": 6.140152887586718e-05, "loss": 2.8374, "step": 5556 }, { "epoch": 0.45569049449141685, "grad_norm": 0.4074001610279083, "learning_rate": 6.137512844222818e-05, "loss": 2.8707, "step": 5558 }, { "epoch": 0.4558544709198053, "grad_norm": 0.43333056569099426, "learning_rate": 6.13487246637594e-05, "loss": 2.8052, "step": 5560 }, { "epoch": 0.4560184473481937, "grad_norm": 0.4388822317123413, "learning_rate": 6.132231754822482e-05, "loss": 2.8677, "step": 5562 }, { "epoch": 0.45618242377658214, "grad_norm": 0.4309976100921631, "learning_rate": 6.129590710338937e-05, "loss": 2.8462, "step": 5564 }, { "epoch": 0.4563464002049705, "grad_norm": 0.45867303013801575, "learning_rate": 6.1269493337019e-05, "loss": 2.8331, "step": 5566 }, { "epoch": 0.45651037663335897, "grad_norm": 0.4165544807910919, "learning_rate": 6.124307625688057e-05, "loss": 2.8549, "step": 5568 }, { "epoch": 0.45667435306174736, "grad_norm": 0.3921827971935272, "learning_rate": 6.121665587074203e-05, "loss": 2.7879, "step": 5570 }, { "epoch": 0.4568383294901358, "grad_norm": 0.4092395305633545, "learning_rate": 6.119023218637217e-05, "loss": 2.7812, "step": 5572 }, { "epoch": 0.4570023059185242, "grad_norm": 0.41083306074142456, "learning_rate": 6.116380521154083e-05, "loss": 2.8403, "step": 5574 }, { "epoch": 0.45716628234691264, "grad_norm": 0.4270792305469513, "learning_rate": 6.113737495401885e-05, "loss": 2.7678, "step": 5576 }, { "epoch": 0.45733025877530104, "grad_norm": 0.4373821020126343, "learning_rate": 6.11109414215779e-05, "loss": 2.8208, "step": 5578 }, { "epoch": 0.4574942352036895, "grad_norm": 0.4400310218334198, "learning_rate": 6.108450462199077e-05, "loss": 2.8128, "step": 5580 }, { "epoch": 0.45765821163207787, "grad_norm": 0.47952520847320557, "learning_rate": 6.10580645630311e-05, "loss": 2.8685, "step": 5582 }, { "epoch": 0.4578221880604663, "grad_norm": 0.4763694703578949, "learning_rate": 6.1031621252473536e-05, "loss": 2.7871, "step": 5584 }, { "epoch": 0.4579861644888547, "grad_norm": 0.4709533154964447, "learning_rate": 6.100517469809368e-05, "loss": 2.7995, "step": 5586 }, { "epoch": 0.45815014091724315, "grad_norm": 0.4494019150733948, "learning_rate": 6.097872490766807e-05, "loss": 2.7969, "step": 5588 }, { "epoch": 0.45831411734563154, "grad_norm": 0.512210488319397, "learning_rate": 6.0952271888974214e-05, "loss": 2.8279, "step": 5590 }, { "epoch": 0.45847809377402, "grad_norm": 0.4916641116142273, "learning_rate": 6.092581564979053e-05, "loss": 2.8165, "step": 5592 }, { "epoch": 0.4586420702024084, "grad_norm": 0.4593188762664795, "learning_rate": 6.089935619789646e-05, "loss": 2.8759, "step": 5594 }, { "epoch": 0.4588060466307968, "grad_norm": 0.4758460223674774, "learning_rate": 6.087289354107229e-05, "loss": 2.7869, "step": 5596 }, { "epoch": 0.4589700230591852, "grad_norm": 0.4455423653125763, "learning_rate": 6.084642768709935e-05, "loss": 2.7912, "step": 5598 }, { "epoch": 0.45913399948757366, "grad_norm": 0.46677160263061523, "learning_rate": 6.0819958643759855e-05, "loss": 2.805, "step": 5600 }, { "epoch": 0.45929797591596205, "grad_norm": 0.44516557455062866, "learning_rate": 6.079348641883693e-05, "loss": 2.8097, "step": 5602 }, { "epoch": 0.4594619523443505, "grad_norm": 0.43633347749710083, "learning_rate": 6.076701102011471e-05, "loss": 2.8249, "step": 5604 }, { "epoch": 0.45962592877273895, "grad_norm": 0.39614197611808777, "learning_rate": 6.0740532455378194e-05, "loss": 2.7605, "step": 5606 }, { "epoch": 0.45978990520112734, "grad_norm": 0.4021206498146057, "learning_rate": 6.0714050732413376e-05, "loss": 2.8536, "step": 5608 }, { "epoch": 0.4599538816295158, "grad_norm": 0.4307616651058197, "learning_rate": 6.06875658590071e-05, "loss": 2.8353, "step": 5610 }, { "epoch": 0.46011785805790417, "grad_norm": 0.4669673442840576, "learning_rate": 6.066107784294723e-05, "loss": 2.8005, "step": 5612 }, { "epoch": 0.4602818344862926, "grad_norm": 0.4729499816894531, "learning_rate": 6.0634586692022454e-05, "loss": 2.846, "step": 5614 }, { "epoch": 0.460445810914681, "grad_norm": 0.486903578042984, "learning_rate": 6.0608092414022466e-05, "loss": 2.8256, "step": 5616 }, { "epoch": 0.46060978734306945, "grad_norm": 0.4671494960784912, "learning_rate": 6.058159501673785e-05, "loss": 2.7911, "step": 5618 }, { "epoch": 0.46077376377145784, "grad_norm": 0.49648165702819824, "learning_rate": 6.055509450796008e-05, "loss": 2.783, "step": 5620 }, { "epoch": 0.4609377401998463, "grad_norm": 0.5165703296661377, "learning_rate": 6.052859089548157e-05, "loss": 2.8046, "step": 5622 }, { "epoch": 0.4611017166282347, "grad_norm": 0.4854130148887634, "learning_rate": 6.0502084187095674e-05, "loss": 2.825, "step": 5624 }, { "epoch": 0.4612656930566231, "grad_norm": 0.5459082722663879, "learning_rate": 6.04755743905966e-05, "loss": 2.8237, "step": 5626 }, { "epoch": 0.4614296694850115, "grad_norm": 0.47205623984336853, "learning_rate": 6.0449061513779507e-05, "loss": 2.8149, "step": 5628 }, { "epoch": 0.46159364591339996, "grad_norm": 0.45652610063552856, "learning_rate": 6.0422545564440424e-05, "loss": 2.8339, "step": 5630 }, { "epoch": 0.46175762234178835, "grad_norm": 0.47060245275497437, "learning_rate": 6.039602655037634e-05, "loss": 2.7832, "step": 5632 }, { "epoch": 0.4619215987701768, "grad_norm": 0.437139630317688, "learning_rate": 6.0369504479385055e-05, "loss": 2.7565, "step": 5634 }, { "epoch": 0.4620855751985652, "grad_norm": 0.4424857497215271, "learning_rate": 6.034297935926537e-05, "loss": 2.7699, "step": 5636 }, { "epoch": 0.46224955162695364, "grad_norm": 0.41876840591430664, "learning_rate": 6.0316451197816905e-05, "loss": 2.7735, "step": 5638 }, { "epoch": 0.462413528055342, "grad_norm": 0.608383059501648, "learning_rate": 6.028992000284022e-05, "loss": 2.8363, "step": 5640 }, { "epoch": 0.4625775044837305, "grad_norm": 0.43988195061683655, "learning_rate": 6.026338578213675e-05, "loss": 2.7752, "step": 5642 }, { "epoch": 0.46274148091211886, "grad_norm": 0.39045771956443787, "learning_rate": 6.0236848543508804e-05, "loss": 2.7811, "step": 5644 }, { "epoch": 0.4629054573405073, "grad_norm": 0.4120277464389801, "learning_rate": 6.021030829475961e-05, "loss": 2.8247, "step": 5646 }, { "epoch": 0.4630694337688957, "grad_norm": 0.4452279508113861, "learning_rate": 6.018376504369326e-05, "loss": 2.8105, "step": 5648 }, { "epoch": 0.46323341019728415, "grad_norm": 0.43291357159614563, "learning_rate": 6.015721879811473e-05, "loss": 2.8537, "step": 5650 }, { "epoch": 0.46339738662567254, "grad_norm": 0.44539210200309753, "learning_rate": 6.01306695658299e-05, "loss": 2.7894, "step": 5652 }, { "epoch": 0.463561363054061, "grad_norm": 0.4763514995574951, "learning_rate": 6.01041173546455e-05, "loss": 2.8683, "step": 5654 }, { "epoch": 0.4637253394824494, "grad_norm": 0.4426315426826477, "learning_rate": 6.0077562172369136e-05, "loss": 2.7684, "step": 5656 }, { "epoch": 0.4638893159108378, "grad_norm": 0.43244004249572754, "learning_rate": 6.00510040268093e-05, "loss": 2.7787, "step": 5658 }, { "epoch": 0.4640532923392262, "grad_norm": 0.4351418614387512, "learning_rate": 6.002444292577536e-05, "loss": 2.8063, "step": 5660 }, { "epoch": 0.46421726876761465, "grad_norm": 0.4126474857330322, "learning_rate": 5.999787887707753e-05, "loss": 2.8737, "step": 5662 }, { "epoch": 0.4643812451960031, "grad_norm": 0.4187913239002228, "learning_rate": 5.997131188852691e-05, "loss": 2.8078, "step": 5664 }, { "epoch": 0.4645452216243915, "grad_norm": 0.4025074541568756, "learning_rate": 5.994474196793549e-05, "loss": 2.8085, "step": 5666 }, { "epoch": 0.46470919805277994, "grad_norm": 0.4166587293148041, "learning_rate": 5.991816912311606e-05, "loss": 2.7544, "step": 5668 }, { "epoch": 0.4648731744811683, "grad_norm": 0.4639766216278076, "learning_rate": 5.9891593361882306e-05, "loss": 2.8213, "step": 5670 }, { "epoch": 0.4650371509095568, "grad_norm": 0.4072076082229614, "learning_rate": 5.986501469204878e-05, "loss": 2.8197, "step": 5672 }, { "epoch": 0.46520112733794516, "grad_norm": 0.4271763563156128, "learning_rate": 5.983843312143087e-05, "loss": 2.8048, "step": 5674 }, { "epoch": 0.4653651037663336, "grad_norm": 0.4413955807685852, "learning_rate": 5.981184865784484e-05, "loss": 2.7811, "step": 5676 }, { "epoch": 0.465529080194722, "grad_norm": 0.49118563532829285, "learning_rate": 5.978526130910775e-05, "loss": 2.804, "step": 5678 }, { "epoch": 0.46569305662311045, "grad_norm": 0.48541152477264404, "learning_rate": 5.9758671083037596e-05, "loss": 2.8017, "step": 5680 }, { "epoch": 0.46585703305149884, "grad_norm": 0.43740737438201904, "learning_rate": 5.973207798745313e-05, "loss": 2.8365, "step": 5682 }, { "epoch": 0.4660210094798873, "grad_norm": 0.43544474244117737, "learning_rate": 5.970548203017402e-05, "loss": 2.8059, "step": 5684 }, { "epoch": 0.4661849859082757, "grad_norm": 0.4222916066646576, "learning_rate": 5.967888321902072e-05, "loss": 2.7608, "step": 5686 }, { "epoch": 0.4663489623366641, "grad_norm": 0.3928931653499603, "learning_rate": 5.965228156181457e-05, "loss": 2.8234, "step": 5688 }, { "epoch": 0.4665129387650525, "grad_norm": 0.4149591624736786, "learning_rate": 5.9625677066377714e-05, "loss": 2.8284, "step": 5690 }, { "epoch": 0.46667691519344096, "grad_norm": 0.4028678238391876, "learning_rate": 5.959906974053313e-05, "loss": 2.7957, "step": 5692 }, { "epoch": 0.46684089162182935, "grad_norm": 0.409410685300827, "learning_rate": 5.9572459592104654e-05, "loss": 2.7978, "step": 5694 }, { "epoch": 0.4670048680502178, "grad_norm": 0.44319820404052734, "learning_rate": 5.9545846628916957e-05, "loss": 2.7922, "step": 5696 }, { "epoch": 0.4671688444786062, "grad_norm": 0.4542018473148346, "learning_rate": 5.951923085879547e-05, "loss": 2.7683, "step": 5698 }, { "epoch": 0.46733282090699463, "grad_norm": 0.4291388988494873, "learning_rate": 5.949261228956654e-05, "loss": 2.7518, "step": 5700 }, { "epoch": 0.467496797335383, "grad_norm": 0.449232816696167, "learning_rate": 5.946599092905728e-05, "loss": 2.8088, "step": 5702 }, { "epoch": 0.46766077376377146, "grad_norm": 0.4024021625518799, "learning_rate": 5.943936678509563e-05, "loss": 2.7663, "step": 5704 }, { "epoch": 0.46782475019215986, "grad_norm": 0.41187432408332825, "learning_rate": 5.9412739865510356e-05, "loss": 2.7777, "step": 5706 }, { "epoch": 0.4679887266205483, "grad_norm": 0.42438969016075134, "learning_rate": 5.9386110178131074e-05, "loss": 2.8298, "step": 5708 }, { "epoch": 0.4681527030489367, "grad_norm": 0.4174380600452423, "learning_rate": 5.9359477730788135e-05, "loss": 2.758, "step": 5710 }, { "epoch": 0.46831667947732514, "grad_norm": 0.4812970757484436, "learning_rate": 5.933284253131277e-05, "loss": 2.808, "step": 5712 }, { "epoch": 0.46848065590571353, "grad_norm": 0.48399415612220764, "learning_rate": 5.930620458753701e-05, "loss": 2.7885, "step": 5714 }, { "epoch": 0.468644632334102, "grad_norm": 0.5062437653541565, "learning_rate": 5.927956390729364e-05, "loss": 2.8157, "step": 5716 }, { "epoch": 0.4688086087624904, "grad_norm": 0.46985939145088196, "learning_rate": 5.925292049841633e-05, "loss": 2.7961, "step": 5718 }, { "epoch": 0.4689725851908788, "grad_norm": 0.456906795501709, "learning_rate": 5.922627436873951e-05, "loss": 2.8435, "step": 5720 }, { "epoch": 0.46913656161926726, "grad_norm": 0.47446414828300476, "learning_rate": 5.919962552609838e-05, "loss": 2.8044, "step": 5722 }, { "epoch": 0.46930053804765565, "grad_norm": 0.4095275104045868, "learning_rate": 5.9172973978328994e-05, "loss": 2.731, "step": 5724 }, { "epoch": 0.4694645144760441, "grad_norm": 0.38491547107696533, "learning_rate": 5.914631973326819e-05, "loss": 2.7328, "step": 5726 }, { "epoch": 0.4696284909044325, "grad_norm": 0.40289902687072754, "learning_rate": 5.911966279875354e-05, "loss": 2.749, "step": 5728 }, { "epoch": 0.46979246733282093, "grad_norm": 0.42473292350769043, "learning_rate": 5.9093003182623474e-05, "loss": 2.8337, "step": 5730 }, { "epoch": 0.4699564437612093, "grad_norm": 0.4326395094394684, "learning_rate": 5.906634089271722e-05, "loss": 2.8016, "step": 5732 }, { "epoch": 0.47012042018959777, "grad_norm": 0.4024275541305542, "learning_rate": 5.903967593687472e-05, "loss": 2.7876, "step": 5734 }, { "epoch": 0.47028439661798616, "grad_norm": 0.44340696930885315, "learning_rate": 5.901300832293677e-05, "loss": 2.784, "step": 5736 }, { "epoch": 0.4704483730463746, "grad_norm": 0.4537631869316101, "learning_rate": 5.8986338058744905e-05, "loss": 2.8039, "step": 5738 }, { "epoch": 0.470612349474763, "grad_norm": 0.41661155223846436, "learning_rate": 5.895966515214145e-05, "loss": 2.8102, "step": 5740 }, { "epoch": 0.47077632590315144, "grad_norm": 0.4392305016517639, "learning_rate": 5.8932989610969516e-05, "loss": 2.8185, "step": 5742 }, { "epoch": 0.47094030233153983, "grad_norm": 0.46238356828689575, "learning_rate": 5.890631144307299e-05, "loss": 2.8385, "step": 5744 }, { "epoch": 0.4711042787599283, "grad_norm": 0.4088890254497528, "learning_rate": 5.887963065629652e-05, "loss": 2.836, "step": 5746 }, { "epoch": 0.47126825518831666, "grad_norm": 0.38375574350357056, "learning_rate": 5.8852947258485537e-05, "loss": 2.8207, "step": 5748 }, { "epoch": 0.4714322316167051, "grad_norm": 0.42347919940948486, "learning_rate": 5.88262612574862e-05, "loss": 2.7962, "step": 5750 }, { "epoch": 0.4715962080450935, "grad_norm": 0.4518338441848755, "learning_rate": 5.87995726611455e-05, "loss": 2.8055, "step": 5752 }, { "epoch": 0.47176018447348195, "grad_norm": 0.4793908894062042, "learning_rate": 5.877288147731114e-05, "loss": 2.841, "step": 5754 }, { "epoch": 0.47192416090187034, "grad_norm": 0.43137022852897644, "learning_rate": 5.87461877138316e-05, "loss": 2.8008, "step": 5756 }, { "epoch": 0.4720881373302588, "grad_norm": 0.40921229124069214, "learning_rate": 5.871949137855612e-05, "loss": 2.7741, "step": 5758 }, { "epoch": 0.4722521137586472, "grad_norm": 0.40755629539489746, "learning_rate": 5.869279247933469e-05, "loss": 2.8096, "step": 5760 }, { "epoch": 0.4724160901870356, "grad_norm": 0.3983427584171295, "learning_rate": 5.866609102401808e-05, "loss": 2.8176, "step": 5762 }, { "epoch": 0.472580066615424, "grad_norm": 0.3668491244316101, "learning_rate": 5.863938702045777e-05, "loss": 2.8583, "step": 5764 }, { "epoch": 0.47274404304381246, "grad_norm": 0.394542396068573, "learning_rate": 5.8612680476506e-05, "loss": 2.8073, "step": 5766 }, { "epoch": 0.47290801947220085, "grad_norm": 0.40968987345695496, "learning_rate": 5.8585971400015794e-05, "loss": 2.7961, "step": 5768 }, { "epoch": 0.4730719959005893, "grad_norm": 0.4503387212753296, "learning_rate": 5.855925979884088e-05, "loss": 2.7981, "step": 5770 }, { "epoch": 0.4732359723289777, "grad_norm": 0.4372981786727905, "learning_rate": 5.853254568083574e-05, "loss": 2.7914, "step": 5772 }, { "epoch": 0.47339994875736613, "grad_norm": 0.3964093029499054, "learning_rate": 5.85058290538556e-05, "loss": 2.7809, "step": 5774 }, { "epoch": 0.4735639251857546, "grad_norm": 0.43268218636512756, "learning_rate": 5.8479109925756405e-05, "loss": 2.8701, "step": 5776 }, { "epoch": 0.47372790161414297, "grad_norm": 0.3940499722957611, "learning_rate": 5.845238830439488e-05, "loss": 2.8071, "step": 5778 }, { "epoch": 0.4738918780425314, "grad_norm": 0.419542521238327, "learning_rate": 5.842566419762844e-05, "loss": 2.7558, "step": 5780 }, { "epoch": 0.4740558544709198, "grad_norm": 0.4414413571357727, "learning_rate": 5.839893761331524e-05, "loss": 2.8215, "step": 5782 }, { "epoch": 0.47421983089930825, "grad_norm": 0.44728782773017883, "learning_rate": 5.8372208559314177e-05, "loss": 2.7318, "step": 5784 }, { "epoch": 0.47438380732769664, "grad_norm": 0.43412888050079346, "learning_rate": 5.834547704348487e-05, "loss": 2.8587, "step": 5786 }, { "epoch": 0.4745477837560851, "grad_norm": 0.4343017637729645, "learning_rate": 5.831874307368766e-05, "loss": 2.7432, "step": 5788 }, { "epoch": 0.4747117601844735, "grad_norm": 0.4207670986652374, "learning_rate": 5.8292006657783595e-05, "loss": 2.8013, "step": 5790 }, { "epoch": 0.4748757366128619, "grad_norm": 0.3970639407634735, "learning_rate": 5.826526780363447e-05, "loss": 2.8342, "step": 5792 }, { "epoch": 0.4750397130412503, "grad_norm": 0.39141160249710083, "learning_rate": 5.823852651910278e-05, "loss": 2.8018, "step": 5794 }, { "epoch": 0.47520368946963876, "grad_norm": 0.4473559260368347, "learning_rate": 5.821178281205171e-05, "loss": 2.7463, "step": 5796 }, { "epoch": 0.47536766589802715, "grad_norm": 0.4763402044773102, "learning_rate": 5.8185036690345226e-05, "loss": 2.8499, "step": 5798 }, { "epoch": 0.4755316423264156, "grad_norm": 0.4592927396297455, "learning_rate": 5.815828816184793e-05, "loss": 2.792, "step": 5800 }, { "epoch": 0.475695618754804, "grad_norm": 0.4712817072868347, "learning_rate": 5.813153723442518e-05, "loss": 2.792, "step": 5802 }, { "epoch": 0.47585959518319243, "grad_norm": 0.4331757724285126, "learning_rate": 5.810478391594303e-05, "loss": 2.7926, "step": 5804 }, { "epoch": 0.4760235716115808, "grad_norm": 0.4528201222419739, "learning_rate": 5.80780282142682e-05, "loss": 2.7966, "step": 5806 }, { "epoch": 0.47618754803996927, "grad_norm": 0.46794208884239197, "learning_rate": 5.805127013726816e-05, "loss": 2.8304, "step": 5808 }, { "epoch": 0.47635152446835766, "grad_norm": 0.4737168252468109, "learning_rate": 5.802450969281109e-05, "loss": 2.8088, "step": 5810 }, { "epoch": 0.4765155008967461, "grad_norm": 0.4532864987850189, "learning_rate": 5.799774688876578e-05, "loss": 2.8326, "step": 5812 }, { "epoch": 0.4766794773251345, "grad_norm": 0.38387665152549744, "learning_rate": 5.7970981733001813e-05, "loss": 2.7919, "step": 5814 }, { "epoch": 0.47684345375352294, "grad_norm": 0.3972019851207733, "learning_rate": 5.7944214233389397e-05, "loss": 2.8352, "step": 5816 }, { "epoch": 0.47700743018191133, "grad_norm": 0.3993661105632782, "learning_rate": 5.791744439779946e-05, "loss": 2.8333, "step": 5818 }, { "epoch": 0.4771714066102998, "grad_norm": 0.42471030354499817, "learning_rate": 5.7890672234103604e-05, "loss": 2.8133, "step": 5820 }, { "epoch": 0.47733538303868817, "grad_norm": 0.46742257475852966, "learning_rate": 5.786389775017413e-05, "loss": 2.8554, "step": 5822 }, { "epoch": 0.4774993594670766, "grad_norm": 0.38363349437713623, "learning_rate": 5.7837120953884e-05, "loss": 2.8268, "step": 5824 }, { "epoch": 0.477663335895465, "grad_norm": 0.4112091660499573, "learning_rate": 5.7810341853106875e-05, "loss": 2.832, "step": 5826 }, { "epoch": 0.47782731232385345, "grad_norm": 0.3802869915962219, "learning_rate": 5.77835604557171e-05, "loss": 2.8291, "step": 5828 }, { "epoch": 0.47799128875224184, "grad_norm": 0.38989999890327454, "learning_rate": 5.775677676958965e-05, "loss": 2.8092, "step": 5830 }, { "epoch": 0.4781552651806303, "grad_norm": 0.3859577476978302, "learning_rate": 5.772999080260023e-05, "loss": 2.7836, "step": 5832 }, { "epoch": 0.47831924160901873, "grad_norm": 0.4060695767402649, "learning_rate": 5.7703202562625195e-05, "loss": 2.7615, "step": 5834 }, { "epoch": 0.4784832180374071, "grad_norm": 0.41608673334121704, "learning_rate": 5.767641205754153e-05, "loss": 2.8321, "step": 5836 }, { "epoch": 0.47864719446579557, "grad_norm": 0.4441404938697815, "learning_rate": 5.764961929522697e-05, "loss": 2.7576, "step": 5838 }, { "epoch": 0.47881117089418396, "grad_norm": 0.45295077562332153, "learning_rate": 5.762282428355983e-05, "loss": 2.8065, "step": 5840 }, { "epoch": 0.4789751473225724, "grad_norm": 0.4226892292499542, "learning_rate": 5.759602703041911e-05, "loss": 2.8324, "step": 5842 }, { "epoch": 0.4791391237509608, "grad_norm": 0.39947935938835144, "learning_rate": 5.75692275436845e-05, "loss": 2.8515, "step": 5844 }, { "epoch": 0.47930310017934924, "grad_norm": 0.4010763466358185, "learning_rate": 5.754242583123633e-05, "loss": 2.8421, "step": 5846 }, { "epoch": 0.47946707660773763, "grad_norm": 0.42324477434158325, "learning_rate": 5.751562190095557e-05, "loss": 2.8077, "step": 5848 }, { "epoch": 0.4796310530361261, "grad_norm": 0.4278055429458618, "learning_rate": 5.748881576072386e-05, "loss": 2.7678, "step": 5850 }, { "epoch": 0.47979502946451447, "grad_norm": 0.46750038862228394, "learning_rate": 5.746200741842348e-05, "loss": 2.7893, "step": 5852 }, { "epoch": 0.4799590058929029, "grad_norm": 0.40809497237205505, "learning_rate": 5.743519688193735e-05, "loss": 2.7416, "step": 5854 }, { "epoch": 0.4801229823212913, "grad_norm": 0.4718024730682373, "learning_rate": 5.7408384159149055e-05, "loss": 2.796, "step": 5856 }, { "epoch": 0.48028695874967975, "grad_norm": 0.42716535925865173, "learning_rate": 5.738156925794281e-05, "loss": 2.8112, "step": 5858 }, { "epoch": 0.48045093517806814, "grad_norm": 0.4023424983024597, "learning_rate": 5.735475218620349e-05, "loss": 2.7993, "step": 5860 }, { "epoch": 0.4806149116064566, "grad_norm": 0.39570191502571106, "learning_rate": 5.7327932951816565e-05, "loss": 2.8071, "step": 5862 }, { "epoch": 0.480778888034845, "grad_norm": 0.4263247549533844, "learning_rate": 5.730111156266819e-05, "loss": 2.7752, "step": 5864 }, { "epoch": 0.4809428644632334, "grad_norm": 0.4379764795303345, "learning_rate": 5.7274288026645104e-05, "loss": 2.7412, "step": 5866 }, { "epoch": 0.4811068408916218, "grad_norm": 0.4216366112232208, "learning_rate": 5.724746235163473e-05, "loss": 2.7588, "step": 5868 }, { "epoch": 0.48127081732001026, "grad_norm": 0.4250224232673645, "learning_rate": 5.722063454552509e-05, "loss": 2.8584, "step": 5870 }, { "epoch": 0.48143479374839865, "grad_norm": 0.44280561804771423, "learning_rate": 5.7193804616204826e-05, "loss": 2.8243, "step": 5872 }, { "epoch": 0.4815987701767871, "grad_norm": 0.44614461064338684, "learning_rate": 5.716697257156322e-05, "loss": 2.7236, "step": 5874 }, { "epoch": 0.4817627466051755, "grad_norm": 0.44351133704185486, "learning_rate": 5.71401384194902e-05, "loss": 2.7618, "step": 5876 }, { "epoch": 0.48192672303356393, "grad_norm": 0.450923353433609, "learning_rate": 5.711330216787624e-05, "loss": 2.7848, "step": 5878 }, { "epoch": 0.4820906994619523, "grad_norm": 0.45195823907852173, "learning_rate": 5.70864638246125e-05, "loss": 2.8052, "step": 5880 }, { "epoch": 0.48225467589034077, "grad_norm": 0.4296433627605438, "learning_rate": 5.7059623397590734e-05, "loss": 2.7922, "step": 5882 }, { "epoch": 0.48241865231872916, "grad_norm": 0.42876946926116943, "learning_rate": 5.703278089470331e-05, "loss": 2.8272, "step": 5884 }, { "epoch": 0.4825826287471176, "grad_norm": 0.42906731367111206, "learning_rate": 5.700593632384318e-05, "loss": 2.8105, "step": 5886 }, { "epoch": 0.482746605175506, "grad_norm": 0.4271446466445923, "learning_rate": 5.6979089692903954e-05, "loss": 2.8201, "step": 5888 }, { "epoch": 0.48291058160389444, "grad_norm": 0.43975505232810974, "learning_rate": 5.6952241009779794e-05, "loss": 2.8063, "step": 5890 }, { "epoch": 0.4830745580322829, "grad_norm": 0.4173251688480377, "learning_rate": 5.692539028236551e-05, "loss": 2.8241, "step": 5892 }, { "epoch": 0.4832385344606713, "grad_norm": 0.43732768297195435, "learning_rate": 5.68985375185565e-05, "loss": 2.798, "step": 5894 }, { "epoch": 0.4834025108890597, "grad_norm": 0.42669641971588135, "learning_rate": 5.687168272624874e-05, "loss": 2.8024, "step": 5896 }, { "epoch": 0.4835664873174481, "grad_norm": 0.4393509328365326, "learning_rate": 5.6844825913338825e-05, "loss": 2.8367, "step": 5898 }, { "epoch": 0.48373046374583656, "grad_norm": 0.417496919631958, "learning_rate": 5.681796708772396e-05, "loss": 2.8294, "step": 5900 }, { "epoch": 0.48389444017422495, "grad_norm": 0.4174562990665436, "learning_rate": 5.6791106257301866e-05, "loss": 2.7937, "step": 5902 }, { "epoch": 0.4840584166026134, "grad_norm": 0.4357423782348633, "learning_rate": 5.676424342997095e-05, "loss": 2.794, "step": 5904 }, { "epoch": 0.4842223930310018, "grad_norm": 0.4304860234260559, "learning_rate": 5.673737861363016e-05, "loss": 2.8276, "step": 5906 }, { "epoch": 0.48438636945939023, "grad_norm": 0.4502744674682617, "learning_rate": 5.6710511816179036e-05, "loss": 2.7533, "step": 5908 }, { "epoch": 0.4845503458877786, "grad_norm": 0.44293758273124695, "learning_rate": 5.6683643045517665e-05, "loss": 2.8093, "step": 5910 }, { "epoch": 0.48471432231616707, "grad_norm": 0.4096347987651825, "learning_rate": 5.665677230954678e-05, "loss": 2.8277, "step": 5912 }, { "epoch": 0.48487829874455546, "grad_norm": 0.41238588094711304, "learning_rate": 5.6629899616167635e-05, "loss": 2.766, "step": 5914 }, { "epoch": 0.4850422751729439, "grad_norm": 0.4263080656528473, "learning_rate": 5.6603024973282096e-05, "loss": 2.8173, "step": 5916 }, { "epoch": 0.4852062516013323, "grad_norm": 0.39349424839019775, "learning_rate": 5.6576148388792575e-05, "loss": 2.8281, "step": 5918 }, { "epoch": 0.48537022802972074, "grad_norm": 0.4356912076473236, "learning_rate": 5.654926987060209e-05, "loss": 2.7676, "step": 5920 }, { "epoch": 0.48553420445810913, "grad_norm": 0.4062795341014862, "learning_rate": 5.6522389426614184e-05, "loss": 2.7863, "step": 5922 }, { "epoch": 0.4856981808864976, "grad_norm": 0.4009894132614136, "learning_rate": 5.649550706473301e-05, "loss": 2.7931, "step": 5924 }, { "epoch": 0.48586215731488597, "grad_norm": 0.4238545000553131, "learning_rate": 5.646862279286325e-05, "loss": 2.7869, "step": 5926 }, { "epoch": 0.4860261337432744, "grad_norm": 0.4274156391620636, "learning_rate": 5.6441736618910146e-05, "loss": 2.8036, "step": 5928 }, { "epoch": 0.4861901101716628, "grad_norm": 0.4315156042575836, "learning_rate": 5.6414848550779554e-05, "loss": 2.7888, "step": 5930 }, { "epoch": 0.48635408660005125, "grad_norm": 0.4721137285232544, "learning_rate": 5.638795859637782e-05, "loss": 2.7765, "step": 5932 }, { "epoch": 0.48651806302843964, "grad_norm": 0.4357931315898895, "learning_rate": 5.6361066763611856e-05, "loss": 2.8073, "step": 5934 }, { "epoch": 0.4866820394568281, "grad_norm": 0.4593020975589752, "learning_rate": 5.633417306038917e-05, "loss": 2.8103, "step": 5936 }, { "epoch": 0.4868460158852165, "grad_norm": 0.48641642928123474, "learning_rate": 5.630727749461781e-05, "loss": 2.7753, "step": 5938 }, { "epoch": 0.4870099923136049, "grad_norm": 0.4623355567455292, "learning_rate": 5.6280380074206296e-05, "loss": 2.7595, "step": 5940 }, { "epoch": 0.4871739687419933, "grad_norm": 0.4284757375717163, "learning_rate": 5.6253480807063785e-05, "loss": 2.7887, "step": 5942 }, { "epoch": 0.48733794517038176, "grad_norm": 0.4367896318435669, "learning_rate": 5.6226579701099955e-05, "loss": 2.7582, "step": 5944 }, { "epoch": 0.48750192159877015, "grad_norm": 0.42119714617729187, "learning_rate": 5.6199676764225e-05, "loss": 2.7911, "step": 5946 }, { "epoch": 0.4876658980271586, "grad_norm": 0.4299163222312927, "learning_rate": 5.617277200434965e-05, "loss": 2.8189, "step": 5948 }, { "epoch": 0.48782987445554704, "grad_norm": 0.44932517409324646, "learning_rate": 5.614586542938521e-05, "loss": 2.7833, "step": 5950 }, { "epoch": 0.48799385088393543, "grad_norm": 0.43705087900161743, "learning_rate": 5.6118957047243505e-05, "loss": 2.7381, "step": 5952 }, { "epoch": 0.4881578273123239, "grad_norm": 0.44094035029411316, "learning_rate": 5.609204686583683e-05, "loss": 2.7726, "step": 5954 }, { "epoch": 0.48832180374071227, "grad_norm": 0.45215317606925964, "learning_rate": 5.606513489307812e-05, "loss": 2.7216, "step": 5956 }, { "epoch": 0.4884857801691007, "grad_norm": 0.4417240619659424, "learning_rate": 5.603822113688074e-05, "loss": 2.79, "step": 5958 }, { "epoch": 0.4886497565974891, "grad_norm": 0.4115074872970581, "learning_rate": 5.6011305605158614e-05, "loss": 2.7628, "step": 5960 }, { "epoch": 0.48881373302587755, "grad_norm": 0.3991395831108093, "learning_rate": 5.598438830582623e-05, "loss": 2.7127, "step": 5962 }, { "epoch": 0.48897770945426594, "grad_norm": 0.42140302062034607, "learning_rate": 5.595746924679851e-05, "loss": 2.8474, "step": 5964 }, { "epoch": 0.4891416858826544, "grad_norm": 0.41280636191368103, "learning_rate": 5.593054843599097e-05, "loss": 2.7677, "step": 5966 }, { "epoch": 0.4893056623110428, "grad_norm": 0.40295860171318054, "learning_rate": 5.590362588131961e-05, "loss": 2.835, "step": 5968 }, { "epoch": 0.4894696387394312, "grad_norm": 0.41554921865463257, "learning_rate": 5.587670159070093e-05, "loss": 2.7716, "step": 5970 }, { "epoch": 0.4896336151678196, "grad_norm": 0.4251762926578522, "learning_rate": 5.5849775572051955e-05, "loss": 2.7546, "step": 5972 }, { "epoch": 0.48979759159620806, "grad_norm": 0.44020459055900574, "learning_rate": 5.582284783329022e-05, "loss": 2.8029, "step": 5974 }, { "epoch": 0.48996156802459645, "grad_norm": 0.45208802819252014, "learning_rate": 5.579591838233379e-05, "loss": 2.8717, "step": 5976 }, { "epoch": 0.4901255444529849, "grad_norm": 0.46185302734375, "learning_rate": 5.5768987227101167e-05, "loss": 2.8632, "step": 5978 }, { "epoch": 0.4902895208813733, "grad_norm": 0.4524150788784027, "learning_rate": 5.574205437551141e-05, "loss": 2.8004, "step": 5980 }, { "epoch": 0.49045349730976173, "grad_norm": 0.5525970458984375, "learning_rate": 5.5715119835484056e-05, "loss": 2.743, "step": 5982 }, { "epoch": 0.4906174737381501, "grad_norm": 0.5090713500976562, "learning_rate": 5.568818361493915e-05, "loss": 2.7977, "step": 5984 }, { "epoch": 0.49078145016653857, "grad_norm": 0.43728598952293396, "learning_rate": 5.566124572179723e-05, "loss": 2.7899, "step": 5986 }, { "epoch": 0.49094542659492696, "grad_norm": 0.4495003819465637, "learning_rate": 5.56343061639793e-05, "loss": 2.7825, "step": 5988 }, { "epoch": 0.4911094030233154, "grad_norm": 0.4431800842285156, "learning_rate": 5.5607364949406895e-05, "loss": 2.7782, "step": 5990 }, { "epoch": 0.4912733794517038, "grad_norm": 0.44171142578125, "learning_rate": 5.558042208600201e-05, "loss": 2.8494, "step": 5992 }, { "epoch": 0.49143735588009224, "grad_norm": 0.43040671944618225, "learning_rate": 5.5553477581687117e-05, "loss": 2.8051, "step": 5994 }, { "epoch": 0.49160133230848063, "grad_norm": 0.4206514358520508, "learning_rate": 5.552653144438521e-05, "loss": 2.7955, "step": 5996 }, { "epoch": 0.4917653087368691, "grad_norm": 0.4470977783203125, "learning_rate": 5.549958368201971e-05, "loss": 2.7861, "step": 5998 }, { "epoch": 0.49192928516525747, "grad_norm": 0.4228839874267578, "learning_rate": 5.547263430251456e-05, "loss": 2.8133, "step": 6000 }, { "epoch": 0.4920932615936459, "grad_norm": 0.4443196654319763, "learning_rate": 5.544568331379415e-05, "loss": 2.8121, "step": 6002 }, { "epoch": 0.49225723802203436, "grad_norm": 0.408275842666626, "learning_rate": 5.541873072378337e-05, "loss": 2.7748, "step": 6004 }, { "epoch": 0.49242121445042275, "grad_norm": 0.3874877989292145, "learning_rate": 5.539177654040755e-05, "loss": 2.8108, "step": 6006 }, { "epoch": 0.4925851908788112, "grad_norm": 0.39103594422340393, "learning_rate": 5.536482077159251e-05, "loss": 2.8343, "step": 6008 }, { "epoch": 0.4927491673071996, "grad_norm": 0.39628902077674866, "learning_rate": 5.533786342526455e-05, "loss": 2.7583, "step": 6010 }, { "epoch": 0.49291314373558803, "grad_norm": 0.44031769037246704, "learning_rate": 5.531090450935037e-05, "loss": 2.7796, "step": 6012 }, { "epoch": 0.4930771201639764, "grad_norm": 0.4099719524383545, "learning_rate": 5.5283944031777215e-05, "loss": 2.7807, "step": 6014 }, { "epoch": 0.49324109659236487, "grad_norm": 0.4040038585662842, "learning_rate": 5.5256982000472746e-05, "loss": 2.794, "step": 6016 }, { "epoch": 0.49340507302075326, "grad_norm": 0.39991289377212524, "learning_rate": 5.523001842336507e-05, "loss": 2.7993, "step": 6018 }, { "epoch": 0.4935690494491417, "grad_norm": 0.39744338393211365, "learning_rate": 5.5203053308382766e-05, "loss": 2.7828, "step": 6020 }, { "epoch": 0.4937330258775301, "grad_norm": 0.39443936944007874, "learning_rate": 5.5176086663454884e-05, "loss": 2.7654, "step": 6022 }, { "epoch": 0.49389700230591854, "grad_norm": 0.4279281198978424, "learning_rate": 5.5149118496510865e-05, "loss": 2.79, "step": 6024 }, { "epoch": 0.49406097873430693, "grad_norm": 0.44886335730552673, "learning_rate": 5.5122148815480655e-05, "loss": 2.7787, "step": 6026 }, { "epoch": 0.4942249551626954, "grad_norm": 0.43961870670318604, "learning_rate": 5.5095177628294635e-05, "loss": 2.8273, "step": 6028 }, { "epoch": 0.49438893159108377, "grad_norm": 0.44873446226119995, "learning_rate": 5.506820494288361e-05, "loss": 2.8053, "step": 6030 }, { "epoch": 0.4945529080194722, "grad_norm": 0.49015265703201294, "learning_rate": 5.504123076717882e-05, "loss": 2.8459, "step": 6032 }, { "epoch": 0.4947168844478606, "grad_norm": 0.47962191700935364, "learning_rate": 5.501425510911199e-05, "loss": 2.7945, "step": 6034 }, { "epoch": 0.49488086087624905, "grad_norm": 0.49250590801239014, "learning_rate": 5.4987277976615224e-05, "loss": 2.7716, "step": 6036 }, { "epoch": 0.49504483730463744, "grad_norm": 0.4474705755710602, "learning_rate": 5.4960299377621085e-05, "loss": 2.7868, "step": 6038 }, { "epoch": 0.4952088137330259, "grad_norm": 0.4595922529697418, "learning_rate": 5.493331932006259e-05, "loss": 2.8321, "step": 6040 }, { "epoch": 0.4953727901614143, "grad_norm": 0.41899386048316956, "learning_rate": 5.490633781187313e-05, "loss": 2.7934, "step": 6042 }, { "epoch": 0.4955367665898027, "grad_norm": 0.40570735931396484, "learning_rate": 5.487935486098658e-05, "loss": 2.7884, "step": 6044 }, { "epoch": 0.4957007430181911, "grad_norm": 0.43426474928855896, "learning_rate": 5.48523704753372e-05, "loss": 2.8367, "step": 6046 }, { "epoch": 0.49586471944657956, "grad_norm": 0.4359624683856964, "learning_rate": 5.482538466285968e-05, "loss": 2.7899, "step": 6048 }, { "epoch": 0.49602869587496795, "grad_norm": 0.41395142674446106, "learning_rate": 5.4798397431489146e-05, "loss": 2.7557, "step": 6050 }, { "epoch": 0.4961926723033564, "grad_norm": 0.44301837682724, "learning_rate": 5.4771408789161126e-05, "loss": 2.8514, "step": 6052 }, { "epoch": 0.4963566487317448, "grad_norm": 0.4121994078159332, "learning_rate": 5.474441874381157e-05, "loss": 2.7861, "step": 6054 }, { "epoch": 0.49652062516013323, "grad_norm": 0.4379679262638092, "learning_rate": 5.471742730337682e-05, "loss": 2.7897, "step": 6056 }, { "epoch": 0.4966846015885216, "grad_norm": 0.43107476830482483, "learning_rate": 5.4690434475793674e-05, "loss": 2.7743, "step": 6058 }, { "epoch": 0.49684857801691007, "grad_norm": 0.4141952693462372, "learning_rate": 5.4663440268999285e-05, "loss": 2.7734, "step": 6060 }, { "epoch": 0.4970125544452985, "grad_norm": 0.4251377284526825, "learning_rate": 5.463644469093125e-05, "loss": 2.8095, "step": 6062 }, { "epoch": 0.4971765308736869, "grad_norm": 0.46977972984313965, "learning_rate": 5.4609447749527564e-05, "loss": 2.7804, "step": 6064 }, { "epoch": 0.49734050730207535, "grad_norm": 0.44376277923583984, "learning_rate": 5.458244945272659e-05, "loss": 2.7842, "step": 6066 }, { "epoch": 0.49750448373046374, "grad_norm": 0.4628044664859772, "learning_rate": 5.455544980846714e-05, "loss": 2.7729, "step": 6068 }, { "epoch": 0.4976684601588522, "grad_norm": 0.47324278950691223, "learning_rate": 5.452844882468837e-05, "loss": 2.7729, "step": 6070 }, { "epoch": 0.4978324365872406, "grad_norm": 0.4188961088657379, "learning_rate": 5.450144650932987e-05, "loss": 2.7722, "step": 6072 }, { "epoch": 0.497996413015629, "grad_norm": 0.4392087161540985, "learning_rate": 5.447444287033159e-05, "loss": 2.8274, "step": 6074 }, { "epoch": 0.4981603894440174, "grad_norm": 0.4453078508377075, "learning_rate": 5.4447437915633926e-05, "loss": 2.779, "step": 6076 }, { "epoch": 0.49832436587240586, "grad_norm": 0.4676656126976013, "learning_rate": 5.4420431653177575e-05, "loss": 2.775, "step": 6078 }, { "epoch": 0.49848834230079425, "grad_norm": 0.42664840817451477, "learning_rate": 5.439342409090369e-05, "loss": 2.7797, "step": 6080 }, { "epoch": 0.4986523187291827, "grad_norm": 0.43920576572418213, "learning_rate": 5.436641523675379e-05, "loss": 2.7808, "step": 6082 }, { "epoch": 0.4988162951575711, "grad_norm": 0.4442078769207001, "learning_rate": 5.433940509866975e-05, "loss": 2.8434, "step": 6084 }, { "epoch": 0.49898027158595953, "grad_norm": 0.41238483786582947, "learning_rate": 5.4312393684593833e-05, "loss": 2.8077, "step": 6086 }, { "epoch": 0.4991442480143479, "grad_norm": 0.4398910105228424, "learning_rate": 5.4285381002468696e-05, "loss": 2.7683, "step": 6088 }, { "epoch": 0.49930822444273637, "grad_norm": 0.46023282408714294, "learning_rate": 5.425836706023735e-05, "loss": 2.8109, "step": 6090 }, { "epoch": 0.49947220087112476, "grad_norm": 0.44774797558784485, "learning_rate": 5.4231351865843174e-05, "loss": 2.7536, "step": 6092 }, { "epoch": 0.4996361772995132, "grad_norm": 0.4440127909183502, "learning_rate": 5.4204335427229935e-05, "loss": 2.831, "step": 6094 }, { "epoch": 0.4998001537279016, "grad_norm": 0.39880669116973877, "learning_rate": 5.4177317752341725e-05, "loss": 2.8042, "step": 6096 }, { "epoch": 0.49996413015629004, "grad_norm": 0.38815608620643616, "learning_rate": 5.415029884912306e-05, "loss": 2.7251, "step": 6098 }, { "epoch": 0.5001281065846784, "grad_norm": 0.40447503328323364, "learning_rate": 5.412327872551879e-05, "loss": 2.7954, "step": 6100 }, { "epoch": 0.5002920830130668, "grad_norm": 0.39449626207351685, "learning_rate": 5.409625738947408e-05, "loss": 2.7724, "step": 6102 }, { "epoch": 0.5004560594414553, "grad_norm": 0.4029788374900818, "learning_rate": 5.406923484893452e-05, "loss": 2.8484, "step": 6104 }, { "epoch": 0.5006200358698437, "grad_norm": 0.39150452613830566, "learning_rate": 5.404221111184603e-05, "loss": 2.8264, "step": 6106 }, { "epoch": 0.5007840122982321, "grad_norm": 0.40811586380004883, "learning_rate": 5.4015186186154856e-05, "loss": 2.7428, "step": 6108 }, { "epoch": 0.5009479887266205, "grad_norm": 0.46742165088653564, "learning_rate": 5.39881600798076e-05, "loss": 2.8109, "step": 6110 }, { "epoch": 0.501111965155009, "grad_norm": 0.45936119556427, "learning_rate": 5.396113280075128e-05, "loss": 2.7595, "step": 6112 }, { "epoch": 0.5012759415833974, "grad_norm": 0.4600870609283447, "learning_rate": 5.3934104356933165e-05, "loss": 2.8304, "step": 6114 }, { "epoch": 0.5014399180117858, "grad_norm": 0.4206525683403015, "learning_rate": 5.39070747563009e-05, "loss": 2.8082, "step": 6116 }, { "epoch": 0.5016038944401742, "grad_norm": 0.4527590274810791, "learning_rate": 5.38800440068025e-05, "loss": 2.8014, "step": 6118 }, { "epoch": 0.5017678708685627, "grad_norm": 0.43309837579727173, "learning_rate": 5.385301211638626e-05, "loss": 2.8216, "step": 6120 }, { "epoch": 0.5019318472969511, "grad_norm": 0.44548389315605164, "learning_rate": 5.382597909300087e-05, "loss": 2.7575, "step": 6122 }, { "epoch": 0.5020958237253395, "grad_norm": 0.4065755605697632, "learning_rate": 5.379894494459533e-05, "loss": 2.7667, "step": 6124 }, { "epoch": 0.502259800153728, "grad_norm": 0.39666667580604553, "learning_rate": 5.377190967911895e-05, "loss": 2.792, "step": 6126 }, { "epoch": 0.5024237765821163, "grad_norm": 0.41114503145217896, "learning_rate": 5.374487330452139e-05, "loss": 2.7447, "step": 6128 }, { "epoch": 0.5025877530105047, "grad_norm": 0.42686963081359863, "learning_rate": 5.3717835828752646e-05, "loss": 2.7218, "step": 6130 }, { "epoch": 0.5027517294388931, "grad_norm": 0.4103451073169708, "learning_rate": 5.369079725976301e-05, "loss": 2.7768, "step": 6132 }, { "epoch": 0.5029157058672816, "grad_norm": 0.39283350110054016, "learning_rate": 5.366375760550313e-05, "loss": 2.7872, "step": 6134 }, { "epoch": 0.50307968229567, "grad_norm": 0.38679662346839905, "learning_rate": 5.363671687392393e-05, "loss": 2.7488, "step": 6136 }, { "epoch": 0.5032436587240584, "grad_norm": 0.4073856770992279, "learning_rate": 5.360967507297669e-05, "loss": 2.7554, "step": 6138 }, { "epoch": 0.5034076351524468, "grad_norm": 0.41266268491744995, "learning_rate": 5.358263221061296e-05, "loss": 2.7786, "step": 6140 }, { "epoch": 0.5035716115808353, "grad_norm": 0.3967779278755188, "learning_rate": 5.3555588294784664e-05, "loss": 2.7829, "step": 6142 }, { "epoch": 0.5037355880092237, "grad_norm": 0.41831472516059875, "learning_rate": 5.352854333344397e-05, "loss": 2.7711, "step": 6144 }, { "epoch": 0.5038995644376121, "grad_norm": 0.43159353733062744, "learning_rate": 5.3501497334543416e-05, "loss": 2.7562, "step": 6146 }, { "epoch": 0.5040635408660005, "grad_norm": 0.41569891571998596, "learning_rate": 5.34744503060358e-05, "loss": 2.7809, "step": 6148 }, { "epoch": 0.504227517294389, "grad_norm": 0.38171082735061646, "learning_rate": 5.344740225587423e-05, "loss": 2.7576, "step": 6150 }, { "epoch": 0.5043914937227774, "grad_norm": 0.4033462405204773, "learning_rate": 5.342035319201213e-05, "loss": 2.765, "step": 6152 }, { "epoch": 0.5045554701511658, "grad_norm": 0.4013696610927582, "learning_rate": 5.3393303122403224e-05, "loss": 2.7577, "step": 6154 }, { "epoch": 0.5047194465795541, "grad_norm": 0.40175867080688477, "learning_rate": 5.336625205500151e-05, "loss": 2.8194, "step": 6156 }, { "epoch": 0.5048834230079426, "grad_norm": 0.442961722612381, "learning_rate": 5.333919999776129e-05, "loss": 2.8206, "step": 6158 }, { "epoch": 0.505047399436331, "grad_norm": 0.4085559546947479, "learning_rate": 5.3312146958637175e-05, "loss": 2.7768, "step": 6160 }, { "epoch": 0.5052113758647194, "grad_norm": 0.43066325783729553, "learning_rate": 5.3285092945584005e-05, "loss": 2.798, "step": 6162 }, { "epoch": 0.5053753522931078, "grad_norm": 0.4265105724334717, "learning_rate": 5.3258037966556986e-05, "loss": 2.7422, "step": 6164 }, { "epoch": 0.5055393287214963, "grad_norm": 0.4203242361545563, "learning_rate": 5.3230982029511566e-05, "loss": 2.7914, "step": 6166 }, { "epoch": 0.5057033051498847, "grad_norm": 0.41456693410873413, "learning_rate": 5.3203925142403466e-05, "loss": 2.827, "step": 6168 }, { "epoch": 0.5058672815782731, "grad_norm": 0.4037761986255646, "learning_rate": 5.31768673131887e-05, "loss": 2.8208, "step": 6170 }, { "epoch": 0.5060312580066615, "grad_norm": 0.3898729383945465, "learning_rate": 5.31498085498236e-05, "loss": 2.7565, "step": 6172 }, { "epoch": 0.50619523443505, "grad_norm": 0.4107765257358551, "learning_rate": 5.312274886026467e-05, "loss": 2.8201, "step": 6174 }, { "epoch": 0.5063592108634384, "grad_norm": 0.4226357340812683, "learning_rate": 5.309568825246879e-05, "loss": 2.7327, "step": 6176 }, { "epoch": 0.5065231872918268, "grad_norm": 0.42700108885765076, "learning_rate": 5.3068626734393056e-05, "loss": 2.8095, "step": 6178 }, { "epoch": 0.5066871637202153, "grad_norm": 0.4003014862537384, "learning_rate": 5.304156431399484e-05, "loss": 2.7627, "step": 6180 }, { "epoch": 0.5068511401486037, "grad_norm": 0.41454121470451355, "learning_rate": 5.30145009992318e-05, "loss": 2.7588, "step": 6182 }, { "epoch": 0.507015116576992, "grad_norm": 0.4294338822364807, "learning_rate": 5.298743679806182e-05, "loss": 2.8088, "step": 6184 }, { "epoch": 0.5071790930053804, "grad_norm": 0.42749956250190735, "learning_rate": 5.2960371718443055e-05, "loss": 2.8191, "step": 6186 }, { "epoch": 0.507343069433769, "grad_norm": 0.43081730604171753, "learning_rate": 5.2933305768333955e-05, "loss": 2.7848, "step": 6188 }, { "epoch": 0.5075070458621573, "grad_norm": 0.4279966652393341, "learning_rate": 5.29062389556932e-05, "loss": 2.7276, "step": 6190 }, { "epoch": 0.5076710222905457, "grad_norm": 0.4057946503162384, "learning_rate": 5.287917128847969e-05, "loss": 2.7764, "step": 6192 }, { "epoch": 0.5078349987189341, "grad_norm": 0.41668733954429626, "learning_rate": 5.2852102774652634e-05, "loss": 2.7548, "step": 6194 }, { "epoch": 0.5079989751473226, "grad_norm": 0.4230756461620331, "learning_rate": 5.282503342217148e-05, "loss": 2.7572, "step": 6196 }, { "epoch": 0.508162951575711, "grad_norm": 0.42891067266464233, "learning_rate": 5.279796323899587e-05, "loss": 2.7678, "step": 6198 }, { "epoch": 0.5083269280040994, "grad_norm": 0.4005391299724579, "learning_rate": 5.277089223308576e-05, "loss": 2.792, "step": 6200 }, { "epoch": 0.5084909044324878, "grad_norm": 0.4525899291038513, "learning_rate": 5.274382041240129e-05, "loss": 2.8306, "step": 6202 }, { "epoch": 0.5086548808608763, "grad_norm": 0.4422246515750885, "learning_rate": 5.2716747784902875e-05, "loss": 2.7601, "step": 6204 }, { "epoch": 0.5088188572892647, "grad_norm": 0.4195386469364166, "learning_rate": 5.2689674358551175e-05, "loss": 2.7958, "step": 6206 }, { "epoch": 0.5089828337176531, "grad_norm": 0.4343207776546478, "learning_rate": 5.266260014130705e-05, "loss": 2.8017, "step": 6208 }, { "epoch": 0.5091468101460415, "grad_norm": 0.40766385197639465, "learning_rate": 5.26355251411316e-05, "loss": 2.7443, "step": 6210 }, { "epoch": 0.50931078657443, "grad_norm": 0.38682085275650024, "learning_rate": 5.2608449365986175e-05, "loss": 2.7368, "step": 6212 }, { "epoch": 0.5094747630028184, "grad_norm": 0.3705482482910156, "learning_rate": 5.2581372823832364e-05, "loss": 2.7829, "step": 6214 }, { "epoch": 0.5096387394312067, "grad_norm": 0.3838585317134857, "learning_rate": 5.255429552263194e-05, "loss": 2.8119, "step": 6216 }, { "epoch": 0.5098027158595951, "grad_norm": 0.4112929403781891, "learning_rate": 5.25272174703469e-05, "loss": 2.8034, "step": 6218 }, { "epoch": 0.5099666922879836, "grad_norm": 0.43207964301109314, "learning_rate": 5.250013867493953e-05, "loss": 2.7391, "step": 6220 }, { "epoch": 0.510130668716372, "grad_norm": 0.4452822804450989, "learning_rate": 5.2473059144372224e-05, "loss": 2.7944, "step": 6222 }, { "epoch": 0.5102946451447604, "grad_norm": 0.4192415177822113, "learning_rate": 5.2445978886607714e-05, "loss": 2.7224, "step": 6224 }, { "epoch": 0.5104586215731488, "grad_norm": 0.4334215819835663, "learning_rate": 5.241889790960887e-05, "loss": 2.7728, "step": 6226 }, { "epoch": 0.5106225980015373, "grad_norm": 0.44213369488716125, "learning_rate": 5.239181622133876e-05, "loss": 2.8489, "step": 6228 }, { "epoch": 0.5107865744299257, "grad_norm": 0.43869420886039734, "learning_rate": 5.2364733829760726e-05, "loss": 2.7913, "step": 6230 }, { "epoch": 0.5109505508583141, "grad_norm": 0.41994988918304443, "learning_rate": 5.2337650742838274e-05, "loss": 2.7579, "step": 6232 }, { "epoch": 0.5111145272867026, "grad_norm": 0.4251670837402344, "learning_rate": 5.231056696853509e-05, "loss": 2.7373, "step": 6234 }, { "epoch": 0.511278503715091, "grad_norm": 0.42417407035827637, "learning_rate": 5.228348251481514e-05, "loss": 2.8169, "step": 6236 }, { "epoch": 0.5114424801434794, "grad_norm": 0.4605846703052521, "learning_rate": 5.2256397389642517e-05, "loss": 2.7304, "step": 6238 }, { "epoch": 0.5116064565718678, "grad_norm": 0.41390493512153625, "learning_rate": 5.2229311600981546e-05, "loss": 2.771, "step": 6240 }, { "epoch": 0.5117704330002563, "grad_norm": 0.4106113016605377, "learning_rate": 5.220222515679674e-05, "loss": 2.7874, "step": 6242 }, { "epoch": 0.5119344094286447, "grad_norm": 0.40672963857650757, "learning_rate": 5.2175138065052806e-05, "loss": 2.7679, "step": 6244 }, { "epoch": 0.512098385857033, "grad_norm": 0.4203855097293854, "learning_rate": 5.214805033371463e-05, "loss": 2.7466, "step": 6246 }, { "epoch": 0.5122623622854214, "grad_norm": 0.3548525869846344, "learning_rate": 5.21209619707473e-05, "loss": 2.8231, "step": 6248 }, { "epoch": 0.5124263387138099, "grad_norm": 0.37747371196746826, "learning_rate": 5.20938729841161e-05, "loss": 2.8141, "step": 6250 }, { "epoch": 0.5125903151421983, "grad_norm": 0.41691645979881287, "learning_rate": 5.206678338178647e-05, "loss": 2.783, "step": 6252 }, { "epoch": 0.5127542915705867, "grad_norm": 0.4288187623023987, "learning_rate": 5.2039693171724034e-05, "loss": 2.7246, "step": 6254 }, { "epoch": 0.5129182679989751, "grad_norm": 0.41232407093048096, "learning_rate": 5.2012602361894626e-05, "loss": 2.7771, "step": 6256 }, { "epoch": 0.5130822444273636, "grad_norm": 0.39724135398864746, "learning_rate": 5.1985510960264216e-05, "loss": 2.7625, "step": 6258 }, { "epoch": 0.513246220855752, "grad_norm": 0.40316760540008545, "learning_rate": 5.195841897479897e-05, "loss": 2.79, "step": 6260 }, { "epoch": 0.5134101972841404, "grad_norm": 0.3974754214286804, "learning_rate": 5.193132641346524e-05, "loss": 2.7598, "step": 6262 }, { "epoch": 0.5135741737125288, "grad_norm": 0.4112754464149475, "learning_rate": 5.190423328422951e-05, "loss": 2.7954, "step": 6264 }, { "epoch": 0.5137381501409173, "grad_norm": 0.43297746777534485, "learning_rate": 5.1877139595058465e-05, "loss": 2.7357, "step": 6266 }, { "epoch": 0.5139021265693057, "grad_norm": 0.39716073870658875, "learning_rate": 5.185004535391893e-05, "loss": 2.7394, "step": 6268 }, { "epoch": 0.5140661029976941, "grad_norm": 0.3820559084415436, "learning_rate": 5.1822950568777906e-05, "loss": 2.7911, "step": 6270 }, { "epoch": 0.5142300794260825, "grad_norm": 0.4085080623626709, "learning_rate": 5.1795855247602564e-05, "loss": 2.7983, "step": 6272 }, { "epoch": 0.514394055854471, "grad_norm": 0.3924587368965149, "learning_rate": 5.176875939836019e-05, "loss": 2.7302, "step": 6274 }, { "epoch": 0.5145580322828593, "grad_norm": 0.4014110565185547, "learning_rate": 5.174166302901828e-05, "loss": 2.7932, "step": 6276 }, { "epoch": 0.5147220087112477, "grad_norm": 0.4239272475242615, "learning_rate": 5.1714566147544454e-05, "loss": 2.8041, "step": 6278 }, { "epoch": 0.5148859851396361, "grad_norm": 0.3936333954334259, "learning_rate": 5.168746876190649e-05, "loss": 2.8143, "step": 6280 }, { "epoch": 0.5150499615680246, "grad_norm": 0.42053771018981934, "learning_rate": 5.166037088007228e-05, "loss": 2.7328, "step": 6282 }, { "epoch": 0.515213937996413, "grad_norm": 0.4015074074268341, "learning_rate": 5.1633272510009924e-05, "loss": 2.7999, "step": 6284 }, { "epoch": 0.5153779144248014, "grad_norm": 0.4088298976421356, "learning_rate": 5.160617365968764e-05, "loss": 2.7773, "step": 6286 }, { "epoch": 0.5155418908531898, "grad_norm": 0.3864319920539856, "learning_rate": 5.157907433707375e-05, "loss": 2.7068, "step": 6288 }, { "epoch": 0.5157058672815783, "grad_norm": 0.41067034006118774, "learning_rate": 5.155197455013676e-05, "loss": 2.7409, "step": 6290 }, { "epoch": 0.5158698437099667, "grad_norm": 0.39901524782180786, "learning_rate": 5.1524874306845316e-05, "loss": 2.8163, "step": 6292 }, { "epoch": 0.5160338201383551, "grad_norm": 0.4010452330112457, "learning_rate": 5.1497773615168154e-05, "loss": 2.7706, "step": 6294 }, { "epoch": 0.5161977965667436, "grad_norm": 0.39766988158226013, "learning_rate": 5.14706724830742e-05, "loss": 2.8286, "step": 6296 }, { "epoch": 0.516361772995132, "grad_norm": 0.4324948489665985, "learning_rate": 5.1443570918532456e-05, "loss": 2.7614, "step": 6298 }, { "epoch": 0.5165257494235204, "grad_norm": 0.42732223868370056, "learning_rate": 5.141646892951206e-05, "loss": 2.8096, "step": 6300 }, { "epoch": 0.5166897258519088, "grad_norm": 0.3943778872489929, "learning_rate": 5.1389366523982306e-05, "loss": 2.7662, "step": 6302 }, { "epoch": 0.5168537022802973, "grad_norm": 0.4113095700740814, "learning_rate": 5.1362263709912604e-05, "loss": 2.7288, "step": 6304 }, { "epoch": 0.5170176787086856, "grad_norm": 0.4113110899925232, "learning_rate": 5.133516049527245e-05, "loss": 2.7923, "step": 6306 }, { "epoch": 0.517181655137074, "grad_norm": 0.42895427346229553, "learning_rate": 5.130805688803149e-05, "loss": 2.7665, "step": 6308 }, { "epoch": 0.5173456315654624, "grad_norm": 0.39833179116249084, "learning_rate": 5.1280952896159484e-05, "loss": 2.7699, "step": 6310 }, { "epoch": 0.5175096079938509, "grad_norm": 0.4006580412387848, "learning_rate": 5.125384852762628e-05, "loss": 2.787, "step": 6312 }, { "epoch": 0.5176735844222393, "grad_norm": 0.40851083397865295, "learning_rate": 5.122674379040186e-05, "loss": 2.7652, "step": 6314 }, { "epoch": 0.5178375608506277, "grad_norm": 0.3744722604751587, "learning_rate": 5.119963869245634e-05, "loss": 2.7533, "step": 6316 }, { "epoch": 0.5180015372790161, "grad_norm": 0.41793954372406006, "learning_rate": 5.1172533241759854e-05, "loss": 2.8169, "step": 6318 }, { "epoch": 0.5181655137074046, "grad_norm": 0.4526681900024414, "learning_rate": 5.114542744628275e-05, "loss": 2.7893, "step": 6320 }, { "epoch": 0.518329490135793, "grad_norm": 0.41063380241394043, "learning_rate": 5.111832131399539e-05, "loss": 2.7651, "step": 6322 }, { "epoch": 0.5184934665641814, "grad_norm": 0.38161396980285645, "learning_rate": 5.109121485286826e-05, "loss": 2.7477, "step": 6324 }, { "epoch": 0.5186574429925698, "grad_norm": 0.3881427049636841, "learning_rate": 5.106410807087197e-05, "loss": 2.7705, "step": 6326 }, { "epoch": 0.5188214194209583, "grad_norm": 0.39390257000923157, "learning_rate": 5.103700097597721e-05, "loss": 2.7051, "step": 6328 }, { "epoch": 0.5189853958493467, "grad_norm": 0.40716588497161865, "learning_rate": 5.1009893576154753e-05, "loss": 2.7446, "step": 6330 }, { "epoch": 0.519149372277735, "grad_norm": 0.3895430266857147, "learning_rate": 5.098278587937545e-05, "loss": 2.76, "step": 6332 }, { "epoch": 0.5193133487061234, "grad_norm": 0.4046901762485504, "learning_rate": 5.095567789361029e-05, "loss": 2.814, "step": 6334 }, { "epoch": 0.519477325134512, "grad_norm": 0.41053667664527893, "learning_rate": 5.092856962683028e-05, "loss": 2.7751, "step": 6336 }, { "epoch": 0.5196413015629003, "grad_norm": 0.4014565348625183, "learning_rate": 5.090146108700654e-05, "loss": 2.7611, "step": 6338 }, { "epoch": 0.5198052779912887, "grad_norm": 0.4204341471195221, "learning_rate": 5.087435228211032e-05, "loss": 2.7913, "step": 6340 }, { "epoch": 0.5199692544196771, "grad_norm": 0.4301351308822632, "learning_rate": 5.084724322011284e-05, "loss": 2.7799, "step": 6342 }, { "epoch": 0.5201332308480656, "grad_norm": 0.43886420130729675, "learning_rate": 5.082013390898549e-05, "loss": 2.7522, "step": 6344 }, { "epoch": 0.520297207276454, "grad_norm": 0.46510088443756104, "learning_rate": 5.07930243566997e-05, "loss": 2.8082, "step": 6346 }, { "epoch": 0.5204611837048424, "grad_norm": 0.4435691833496094, "learning_rate": 5.076591457122696e-05, "loss": 2.7263, "step": 6348 }, { "epoch": 0.5206251601332309, "grad_norm": 0.42445266246795654, "learning_rate": 5.073880456053882e-05, "loss": 2.7793, "step": 6350 }, { "epoch": 0.5207891365616193, "grad_norm": 0.3983137607574463, "learning_rate": 5.071169433260696e-05, "loss": 2.7799, "step": 6352 }, { "epoch": 0.5209531129900077, "grad_norm": 0.4298652708530426, "learning_rate": 5.0684583895403034e-05, "loss": 2.7705, "step": 6354 }, { "epoch": 0.5211170894183961, "grad_norm": 0.40018078684806824, "learning_rate": 5.065747325689884e-05, "loss": 2.7392, "step": 6356 }, { "epoch": 0.5212810658467846, "grad_norm": 0.3763446807861328, "learning_rate": 5.063036242506618e-05, "loss": 2.7786, "step": 6358 }, { "epoch": 0.521445042275173, "grad_norm": 0.4118928015232086, "learning_rate": 5.0603251407876915e-05, "loss": 2.7513, "step": 6360 }, { "epoch": 0.5216090187035614, "grad_norm": 0.45322826504707336, "learning_rate": 5.057614021330299e-05, "loss": 2.7825, "step": 6362 }, { "epoch": 0.5217729951319497, "grad_norm": 0.41894903779029846, "learning_rate": 5.0549028849316416e-05, "loss": 2.7983, "step": 6364 }, { "epoch": 0.5219369715603382, "grad_norm": 0.45578399300575256, "learning_rate": 5.052191732388919e-05, "loss": 2.7725, "step": 6366 }, { "epoch": 0.5221009479887266, "grad_norm": 0.4369242191314697, "learning_rate": 5.0494805644993394e-05, "loss": 2.7747, "step": 6368 }, { "epoch": 0.522264924417115, "grad_norm": 0.42365267872810364, "learning_rate": 5.046769382060116e-05, "loss": 2.7458, "step": 6370 }, { "epoch": 0.5224289008455034, "grad_norm": 0.43620315194129944, "learning_rate": 5.044058185868465e-05, "loss": 2.8222, "step": 6372 }, { "epoch": 0.5225928772738919, "grad_norm": 0.4382375478744507, "learning_rate": 5.0413469767216094e-05, "loss": 2.7856, "step": 6374 }, { "epoch": 0.5227568537022803, "grad_norm": 0.4498637616634369, "learning_rate": 5.0386357554167726e-05, "loss": 2.7856, "step": 6376 }, { "epoch": 0.5229208301306687, "grad_norm": 0.44776564836502075, "learning_rate": 5.035924522751182e-05, "loss": 2.7612, "step": 6378 }, { "epoch": 0.5230848065590571, "grad_norm": 0.4710599184036255, "learning_rate": 5.03321327952207e-05, "loss": 2.7498, "step": 6380 }, { "epoch": 0.5232487829874456, "grad_norm": 0.42744359374046326, "learning_rate": 5.030502026526671e-05, "loss": 2.7498, "step": 6382 }, { "epoch": 0.523412759415834, "grad_norm": 0.4643082320690155, "learning_rate": 5.0277907645622235e-05, "loss": 2.7945, "step": 6384 }, { "epoch": 0.5235767358442224, "grad_norm": 0.4620349109172821, "learning_rate": 5.025079494425966e-05, "loss": 2.8336, "step": 6386 }, { "epoch": 0.5237407122726108, "grad_norm": 0.4274936616420746, "learning_rate": 5.022368216915143e-05, "loss": 2.8171, "step": 6388 }, { "epoch": 0.5239046887009993, "grad_norm": 0.4141872823238373, "learning_rate": 5.019656932826999e-05, "loss": 2.7208, "step": 6390 }, { "epoch": 0.5240686651293877, "grad_norm": 0.4212776720523834, "learning_rate": 5.016945642958779e-05, "loss": 2.818, "step": 6392 }, { "epoch": 0.524232641557776, "grad_norm": 0.41134974360466003, "learning_rate": 5.0142343481077336e-05, "loss": 2.743, "step": 6394 }, { "epoch": 0.5243966179861644, "grad_norm": 0.40472692251205444, "learning_rate": 5.011523049071111e-05, "loss": 2.787, "step": 6396 }, { "epoch": 0.5245605944145529, "grad_norm": 0.42915216088294983, "learning_rate": 5.008811746646161e-05, "loss": 2.8417, "step": 6398 }, { "epoch": 0.5247245708429413, "grad_norm": 0.4095515310764313, "learning_rate": 5.0061004416301405e-05, "loss": 2.7547, "step": 6400 }, { "epoch": 0.5248885472713297, "grad_norm": 0.3783782720565796, "learning_rate": 5.003389134820298e-05, "loss": 2.717, "step": 6402 }, { "epoch": 0.5250525236997182, "grad_norm": 0.3920300602912903, "learning_rate": 5.0006778270138875e-05, "loss": 2.7286, "step": 6404 }, { "epoch": 0.5252165001281066, "grad_norm": 0.3948673605918884, "learning_rate": 4.997966519008165e-05, "loss": 2.8211, "step": 6406 }, { "epoch": 0.525380476556495, "grad_norm": 0.39030882716178894, "learning_rate": 4.995255211600383e-05, "loss": 2.7395, "step": 6408 }, { "epoch": 0.5255444529848834, "grad_norm": 0.383803129196167, "learning_rate": 4.9925439055877945e-05, "loss": 2.6929, "step": 6410 }, { "epoch": 0.5257084294132719, "grad_norm": 0.4085908830165863, "learning_rate": 4.989832601767653e-05, "loss": 2.7877, "step": 6412 }, { "epoch": 0.5258724058416603, "grad_norm": 0.361942321062088, "learning_rate": 4.9871213009372115e-05, "loss": 2.7773, "step": 6414 }, { "epoch": 0.5260363822700487, "grad_norm": 0.39183202385902405, "learning_rate": 4.9844100038937216e-05, "loss": 2.8114, "step": 6416 }, { "epoch": 0.5262003586984371, "grad_norm": 0.41694483160972595, "learning_rate": 4.98169871143443e-05, "loss": 2.7261, "step": 6418 }, { "epoch": 0.5263643351268256, "grad_norm": 0.4080658555030823, "learning_rate": 4.9789874243565935e-05, "loss": 2.7013, "step": 6420 }, { "epoch": 0.526528311555214, "grad_norm": 0.4062003791332245, "learning_rate": 4.976276143457454e-05, "loss": 2.7662, "step": 6422 }, { "epoch": 0.5266922879836023, "grad_norm": 0.4070029556751251, "learning_rate": 4.973564869534257e-05, "loss": 2.735, "step": 6424 }, { "epoch": 0.5268562644119907, "grad_norm": 0.397651731967926, "learning_rate": 4.97085360338425e-05, "loss": 2.6623, "step": 6426 }, { "epoch": 0.5270202408403792, "grad_norm": 0.41555312275886536, "learning_rate": 4.9681423458046725e-05, "loss": 2.7732, "step": 6428 }, { "epoch": 0.5271842172687676, "grad_norm": 0.3885171413421631, "learning_rate": 4.965431097592761e-05, "loss": 2.8271, "step": 6430 }, { "epoch": 0.527348193697156, "grad_norm": 0.37522533535957336, "learning_rate": 4.962719859545756e-05, "loss": 2.7542, "step": 6432 }, { "epoch": 0.5275121701255444, "grad_norm": 0.3871397376060486, "learning_rate": 4.96000863246089e-05, "loss": 2.7809, "step": 6434 }, { "epoch": 0.5276761465539329, "grad_norm": 0.42370837926864624, "learning_rate": 4.9572974171353895e-05, "loss": 2.766, "step": 6436 }, { "epoch": 0.5278401229823213, "grad_norm": 0.40288394689559937, "learning_rate": 4.954586214366484e-05, "loss": 2.7893, "step": 6438 }, { "epoch": 0.5280040994107097, "grad_norm": 0.3945050835609436, "learning_rate": 4.951875024951396e-05, "loss": 2.8174, "step": 6440 }, { "epoch": 0.5281680758390981, "grad_norm": 0.382445752620697, "learning_rate": 4.949163849687341e-05, "loss": 2.7378, "step": 6442 }, { "epoch": 0.5283320522674866, "grad_norm": 0.3933001756668091, "learning_rate": 4.946452689371539e-05, "loss": 2.7724, "step": 6444 }, { "epoch": 0.528496028695875, "grad_norm": 0.44155430793762207, "learning_rate": 4.943741544801198e-05, "loss": 2.7414, "step": 6446 }, { "epoch": 0.5286600051242634, "grad_norm": 0.41750699281692505, "learning_rate": 4.941030416773521e-05, "loss": 2.7565, "step": 6448 }, { "epoch": 0.5288239815526518, "grad_norm": 0.41295358538627625, "learning_rate": 4.9383193060857135e-05, "loss": 2.7785, "step": 6450 }, { "epoch": 0.5289879579810403, "grad_norm": 0.43917882442474365, "learning_rate": 4.9356082135349696e-05, "loss": 2.7086, "step": 6452 }, { "epoch": 0.5291519344094286, "grad_norm": 0.41664546728134155, "learning_rate": 4.932897139918478e-05, "loss": 2.7405, "step": 6454 }, { "epoch": 0.529315910837817, "grad_norm": 0.4118877649307251, "learning_rate": 4.930186086033424e-05, "loss": 2.7359, "step": 6456 }, { "epoch": 0.5294798872662054, "grad_norm": 0.4057008624076843, "learning_rate": 4.9274750526769886e-05, "loss": 2.7382, "step": 6458 }, { "epoch": 0.5296438636945939, "grad_norm": 0.39607709646224976, "learning_rate": 4.9247640406463426e-05, "loss": 2.7757, "step": 6460 }, { "epoch": 0.5298078401229823, "grad_norm": 0.3914690911769867, "learning_rate": 4.922053050738655e-05, "loss": 2.7573, "step": 6462 }, { "epoch": 0.5299718165513707, "grad_norm": 0.40739142894744873, "learning_rate": 4.919342083751084e-05, "loss": 2.7704, "step": 6464 }, { "epoch": 0.5301357929797592, "grad_norm": 0.43088585138320923, "learning_rate": 4.916631140480782e-05, "loss": 2.7606, "step": 6466 }, { "epoch": 0.5302997694081476, "grad_norm": 0.4455958604812622, "learning_rate": 4.913920221724899e-05, "loss": 2.7608, "step": 6468 }, { "epoch": 0.530463745836536, "grad_norm": 0.4500090777873993, "learning_rate": 4.911209328280573e-05, "loss": 2.7754, "step": 6470 }, { "epoch": 0.5306277222649244, "grad_norm": 0.43248435854911804, "learning_rate": 4.9084984609449324e-05, "loss": 2.8032, "step": 6472 }, { "epoch": 0.5307916986933129, "grad_norm": 0.443559467792511, "learning_rate": 4.9057876205151066e-05, "loss": 2.7396, "step": 6474 }, { "epoch": 0.5309556751217013, "grad_norm": 0.41736772656440735, "learning_rate": 4.90307680778821e-05, "loss": 2.7462, "step": 6476 }, { "epoch": 0.5311196515500897, "grad_norm": 0.4045504629611969, "learning_rate": 4.9003660235613494e-05, "loss": 2.7085, "step": 6478 }, { "epoch": 0.5312836279784781, "grad_norm": 0.4106195867061615, "learning_rate": 4.897655268631624e-05, "loss": 2.7858, "step": 6480 }, { "epoch": 0.5314476044068666, "grad_norm": 0.4442145526409149, "learning_rate": 4.894944543796129e-05, "loss": 2.7608, "step": 6482 }, { "epoch": 0.531611580835255, "grad_norm": 0.41611093282699585, "learning_rate": 4.892233849851941e-05, "loss": 2.8305, "step": 6484 }, { "epoch": 0.5317755572636433, "grad_norm": 0.39070653915405273, "learning_rate": 4.889523187596139e-05, "loss": 2.7433, "step": 6486 }, { "epoch": 0.5319395336920317, "grad_norm": 0.40593773126602173, "learning_rate": 4.8868125578257835e-05, "loss": 2.7716, "step": 6488 }, { "epoch": 0.5321035101204202, "grad_norm": 0.3786163032054901, "learning_rate": 4.884101961337927e-05, "loss": 2.7912, "step": 6490 }, { "epoch": 0.5322674865488086, "grad_norm": 0.37940514087677, "learning_rate": 4.881391398929619e-05, "loss": 2.8256, "step": 6492 }, { "epoch": 0.532431462977197, "grad_norm": 0.3631497323513031, "learning_rate": 4.8786808713978905e-05, "loss": 2.7599, "step": 6494 }, { "epoch": 0.5325954394055854, "grad_norm": 0.407393217086792, "learning_rate": 4.875970379539765e-05, "loss": 2.7604, "step": 6496 }, { "epoch": 0.5327594158339739, "grad_norm": 0.40873128175735474, "learning_rate": 4.87325992415226e-05, "loss": 2.7488, "step": 6498 }, { "epoch": 0.5329233922623623, "grad_norm": 0.37873297929763794, "learning_rate": 4.8705495060323765e-05, "loss": 2.705, "step": 6500 }, { "epoch": 0.5330873686907507, "grad_norm": 0.3927375078201294, "learning_rate": 4.867839125977105e-05, "loss": 2.7707, "step": 6502 }, { "epoch": 0.5332513451191391, "grad_norm": 0.3866179287433624, "learning_rate": 4.8651287847834285e-05, "loss": 2.8109, "step": 6504 }, { "epoch": 0.5334153215475276, "grad_norm": 0.42081525921821594, "learning_rate": 4.862418483248316e-05, "loss": 2.7731, "step": 6506 }, { "epoch": 0.533579297975916, "grad_norm": 0.4573631286621094, "learning_rate": 4.859708222168724e-05, "loss": 2.7307, "step": 6508 }, { "epoch": 0.5337432744043044, "grad_norm": 0.4063120186328888, "learning_rate": 4.856998002341601e-05, "loss": 2.7532, "step": 6510 }, { "epoch": 0.5339072508326927, "grad_norm": 0.41616347432136536, "learning_rate": 4.8542878245638795e-05, "loss": 2.7735, "step": 6512 }, { "epoch": 0.5340712272610812, "grad_norm": 0.4180244207382202, "learning_rate": 4.8515776896324784e-05, "loss": 2.7492, "step": 6514 }, { "epoch": 0.5342352036894696, "grad_norm": 0.4002653658390045, "learning_rate": 4.848867598344311e-05, "loss": 2.737, "step": 6516 }, { "epoch": 0.534399180117858, "grad_norm": 0.3756474554538727, "learning_rate": 4.846157551496271e-05, "loss": 2.765, "step": 6518 }, { "epoch": 0.5345631565462465, "grad_norm": 0.3449940085411072, "learning_rate": 4.8434475498852394e-05, "loss": 2.7273, "step": 6520 }, { "epoch": 0.5347271329746349, "grad_norm": 0.3994511365890503, "learning_rate": 4.84073759430809e-05, "loss": 2.7916, "step": 6522 }, { "epoch": 0.5348911094030233, "grad_norm": 0.4182688295841217, "learning_rate": 4.8380276855616755e-05, "loss": 2.7951, "step": 6524 }, { "epoch": 0.5350550858314117, "grad_norm": 0.40519729256629944, "learning_rate": 4.8353178244428395e-05, "loss": 2.7612, "step": 6526 }, { "epoch": 0.5352190622598002, "grad_norm": 0.3876676857471466, "learning_rate": 4.832608011748411e-05, "loss": 2.717, "step": 6528 }, { "epoch": 0.5353830386881886, "grad_norm": 0.3639967143535614, "learning_rate": 4.829898248275203e-05, "loss": 2.7299, "step": 6530 }, { "epoch": 0.535547015116577, "grad_norm": 0.3999374508857727, "learning_rate": 4.827188534820013e-05, "loss": 2.7423, "step": 6532 }, { "epoch": 0.5357109915449654, "grad_norm": 0.3990139365196228, "learning_rate": 4.824478872179631e-05, "loss": 2.7551, "step": 6534 }, { "epoch": 0.5358749679733539, "grad_norm": 0.38419950008392334, "learning_rate": 4.821769261150823e-05, "loss": 2.7131, "step": 6536 }, { "epoch": 0.5360389444017423, "grad_norm": 0.3976088762283325, "learning_rate": 4.819059702530343e-05, "loss": 2.766, "step": 6538 }, { "epoch": 0.5362029208301307, "grad_norm": 0.45600828528404236, "learning_rate": 4.816350197114935e-05, "loss": 2.7843, "step": 6540 }, { "epoch": 0.536366897258519, "grad_norm": 0.4235410988330841, "learning_rate": 4.813640745701319e-05, "loss": 2.7852, "step": 6542 }, { "epoch": 0.5365308736869076, "grad_norm": 0.473484069108963, "learning_rate": 4.810931349086202e-05, "loss": 2.7651, "step": 6544 }, { "epoch": 0.5366948501152959, "grad_norm": 0.4139662981033325, "learning_rate": 4.80822200806628e-05, "loss": 2.7612, "step": 6546 }, { "epoch": 0.5368588265436843, "grad_norm": 0.3821699917316437, "learning_rate": 4.8055127234382243e-05, "loss": 2.7275, "step": 6548 }, { "epoch": 0.5370228029720727, "grad_norm": 0.40968820452690125, "learning_rate": 4.8028034959986954e-05, "loss": 2.826, "step": 6550 }, { "epoch": 0.5371867794004612, "grad_norm": 0.41920238733291626, "learning_rate": 4.8000943265443355e-05, "loss": 2.7462, "step": 6552 }, { "epoch": 0.5373507558288496, "grad_norm": 0.41442564129829407, "learning_rate": 4.797385215871769e-05, "loss": 2.7687, "step": 6554 }, { "epoch": 0.537514732257238, "grad_norm": 0.40692684054374695, "learning_rate": 4.7946761647776015e-05, "loss": 2.7562, "step": 6556 }, { "epoch": 0.5376787086856264, "grad_norm": 0.41064393520355225, "learning_rate": 4.791967174058428e-05, "loss": 2.7402, "step": 6558 }, { "epoch": 0.5378426851140149, "grad_norm": 0.3895728290081024, "learning_rate": 4.789258244510817e-05, "loss": 2.7531, "step": 6560 }, { "epoch": 0.5380066615424033, "grad_norm": 0.3740815222263336, "learning_rate": 4.786549376931322e-05, "loss": 2.7991, "step": 6562 }, { "epoch": 0.5381706379707917, "grad_norm": 0.39689722657203674, "learning_rate": 4.7838405721164833e-05, "loss": 2.743, "step": 6564 }, { "epoch": 0.5383346143991801, "grad_norm": 0.38677778840065, "learning_rate": 4.7811318308628154e-05, "loss": 2.7792, "step": 6566 }, { "epoch": 0.5384985908275686, "grad_norm": 0.3950054943561554, "learning_rate": 4.7784231539668164e-05, "loss": 2.7371, "step": 6568 }, { "epoch": 0.538662567255957, "grad_norm": 0.4083583950996399, "learning_rate": 4.775714542224971e-05, "loss": 2.7824, "step": 6570 }, { "epoch": 0.5388265436843453, "grad_norm": 0.3985320031642914, "learning_rate": 4.773005996433737e-05, "loss": 2.7895, "step": 6572 }, { "epoch": 0.5389905201127337, "grad_norm": 0.37357017397880554, "learning_rate": 4.7702975173895544e-05, "loss": 2.7912, "step": 6574 }, { "epoch": 0.5391544965411222, "grad_norm": 0.40611863136291504, "learning_rate": 4.767589105888849e-05, "loss": 2.6963, "step": 6576 }, { "epoch": 0.5393184729695106, "grad_norm": 0.39386776089668274, "learning_rate": 4.7648807627280206e-05, "loss": 2.7935, "step": 6578 }, { "epoch": 0.539482449397899, "grad_norm": 0.4086373448371887, "learning_rate": 4.762172488703449e-05, "loss": 2.786, "step": 6580 }, { "epoch": 0.5396464258262875, "grad_norm": 0.40746963024139404, "learning_rate": 4.7594642846114995e-05, "loss": 2.7644, "step": 6582 }, { "epoch": 0.5398104022546759, "grad_norm": 0.38858145475387573, "learning_rate": 4.756756151248512e-05, "loss": 2.7503, "step": 6584 }, { "epoch": 0.5399743786830643, "grad_norm": 0.38517722487449646, "learning_rate": 4.754048089410805e-05, "loss": 2.8053, "step": 6586 }, { "epoch": 0.5401383551114527, "grad_norm": 0.3999423086643219, "learning_rate": 4.7513400998946814e-05, "loss": 2.7599, "step": 6588 }, { "epoch": 0.5403023315398412, "grad_norm": 0.4180408716201782, "learning_rate": 4.748632183496416e-05, "loss": 2.779, "step": 6590 }, { "epoch": 0.5404663079682296, "grad_norm": 0.4267200231552124, "learning_rate": 4.745924341012266e-05, "loss": 2.7721, "step": 6592 }, { "epoch": 0.540630284396618, "grad_norm": 0.42305755615234375, "learning_rate": 4.743216573238466e-05, "loss": 2.7375, "step": 6594 }, { "epoch": 0.5407942608250064, "grad_norm": 0.41102567315101624, "learning_rate": 4.7405088809712294e-05, "loss": 2.7693, "step": 6596 }, { "epoch": 0.5409582372533949, "grad_norm": 0.40447738766670227, "learning_rate": 4.737801265006746e-05, "loss": 2.7783, "step": 6598 }, { "epoch": 0.5411222136817833, "grad_norm": 0.37356311082839966, "learning_rate": 4.735093726141185e-05, "loss": 2.7405, "step": 6600 }, { "epoch": 0.5412861901101716, "grad_norm": 0.3971530795097351, "learning_rate": 4.7323862651706925e-05, "loss": 2.7306, "step": 6602 }, { "epoch": 0.54145016653856, "grad_norm": 0.4356495141983032, "learning_rate": 4.729678882891386e-05, "loss": 2.8043, "step": 6604 }, { "epoch": 0.5416141429669485, "grad_norm": 0.41701847314834595, "learning_rate": 4.726971580099372e-05, "loss": 2.7598, "step": 6606 }, { "epoch": 0.5417781193953369, "grad_norm": 0.4182874262332916, "learning_rate": 4.724264357590722e-05, "loss": 2.7265, "step": 6608 }, { "epoch": 0.5419420958237253, "grad_norm": 0.3866254687309265, "learning_rate": 4.721557216161488e-05, "loss": 2.7641, "step": 6610 }, { "epoch": 0.5421060722521137, "grad_norm": 0.3859403431415558, "learning_rate": 4.718850156607702e-05, "loss": 2.7343, "step": 6612 }, { "epoch": 0.5422700486805022, "grad_norm": 0.3834199905395508, "learning_rate": 4.716143179725367e-05, "loss": 2.7267, "step": 6614 }, { "epoch": 0.5424340251088906, "grad_norm": 0.3715173006057739, "learning_rate": 4.713436286310461e-05, "loss": 2.8098, "step": 6616 }, { "epoch": 0.542598001537279, "grad_norm": 0.3994891047477722, "learning_rate": 4.7107294771589416e-05, "loss": 2.686, "step": 6618 }, { "epoch": 0.5427619779656674, "grad_norm": 0.4067355692386627, "learning_rate": 4.7080227530667406e-05, "loss": 2.7673, "step": 6620 }, { "epoch": 0.5429259543940559, "grad_norm": 0.45450058579444885, "learning_rate": 4.7053161148297626e-05, "loss": 2.7531, "step": 6622 }, { "epoch": 0.5430899308224443, "grad_norm": 0.4142906963825226, "learning_rate": 4.7026095632438885e-05, "loss": 2.7542, "step": 6624 }, { "epoch": 0.5432539072508327, "grad_norm": 0.43223828077316284, "learning_rate": 4.6999030991049735e-05, "loss": 2.7488, "step": 6626 }, { "epoch": 0.5434178836792211, "grad_norm": 0.4043966233730316, "learning_rate": 4.697196723208845e-05, "loss": 2.7523, "step": 6628 }, { "epoch": 0.5435818601076096, "grad_norm": 0.40228337049484253, "learning_rate": 4.6944904363513096e-05, "loss": 2.7256, "step": 6630 }, { "epoch": 0.543745836535998, "grad_norm": 0.4250689446926117, "learning_rate": 4.6917842393281444e-05, "loss": 2.8177, "step": 6632 }, { "epoch": 0.5439098129643863, "grad_norm": 0.432136207818985, "learning_rate": 4.6890781329350963e-05, "loss": 2.741, "step": 6634 }, { "epoch": 0.5440737893927748, "grad_norm": 0.4338907301425934, "learning_rate": 4.686372117967895e-05, "loss": 2.7243, "step": 6636 }, { "epoch": 0.5442377658211632, "grad_norm": 0.4134625494480133, "learning_rate": 4.683666195222235e-05, "loss": 2.7458, "step": 6638 }, { "epoch": 0.5444017422495516, "grad_norm": 0.39210569858551025, "learning_rate": 4.680960365493785e-05, "loss": 2.7762, "step": 6640 }, { "epoch": 0.54456571867794, "grad_norm": 0.4027068614959717, "learning_rate": 4.6782546295781914e-05, "loss": 2.7606, "step": 6642 }, { "epoch": 0.5447296951063285, "grad_norm": 0.40476635098457336, "learning_rate": 4.6755489882710666e-05, "loss": 2.7356, "step": 6644 }, { "epoch": 0.5448936715347169, "grad_norm": 0.438758909702301, "learning_rate": 4.672843442367999e-05, "loss": 2.7849, "step": 6646 }, { "epoch": 0.5450576479631053, "grad_norm": 0.3990931212902069, "learning_rate": 4.6701379926645495e-05, "loss": 2.7732, "step": 6648 }, { "epoch": 0.5452216243914937, "grad_norm": 0.457121878862381, "learning_rate": 4.6674326399562476e-05, "loss": 2.8191, "step": 6650 }, { "epoch": 0.5453856008198822, "grad_norm": 0.43321701884269714, "learning_rate": 4.664727385038594e-05, "loss": 2.7818, "step": 6652 }, { "epoch": 0.5455495772482706, "grad_norm": 0.43147024512290955, "learning_rate": 4.662022228707067e-05, "loss": 2.7077, "step": 6654 }, { "epoch": 0.545713553676659, "grad_norm": 0.45424506068229675, "learning_rate": 4.65931717175711e-05, "loss": 2.7434, "step": 6656 }, { "epoch": 0.5458775301050474, "grad_norm": 0.4286510646343231, "learning_rate": 4.656612214984135e-05, "loss": 2.7395, "step": 6658 }, { "epoch": 0.5460415065334359, "grad_norm": 0.38945695757865906, "learning_rate": 4.653907359183533e-05, "loss": 2.7433, "step": 6660 }, { "epoch": 0.5462054829618243, "grad_norm": 0.418412983417511, "learning_rate": 4.651202605150658e-05, "loss": 2.7534, "step": 6662 }, { "epoch": 0.5463694593902126, "grad_norm": 0.4252218008041382, "learning_rate": 4.648497953680838e-05, "loss": 2.7365, "step": 6664 }, { "epoch": 0.546533435818601, "grad_norm": 0.4022780656814575, "learning_rate": 4.6457934055693684e-05, "loss": 2.7434, "step": 6666 }, { "epoch": 0.5466974122469895, "grad_norm": 0.4030002951622009, "learning_rate": 4.643088961611513e-05, "loss": 2.7049, "step": 6668 }, { "epoch": 0.5468613886753779, "grad_norm": 0.401536226272583, "learning_rate": 4.640384622602512e-05, "loss": 2.7702, "step": 6670 }, { "epoch": 0.5470253651037663, "grad_norm": 0.4336666762828827, "learning_rate": 4.637680389337567e-05, "loss": 2.7814, "step": 6672 }, { "epoch": 0.5471893415321547, "grad_norm": 0.4129686951637268, "learning_rate": 4.6349762626118505e-05, "loss": 2.7637, "step": 6674 }, { "epoch": 0.5473533179605432, "grad_norm": 0.41660234332084656, "learning_rate": 4.6322722432205076e-05, "loss": 2.731, "step": 6676 }, { "epoch": 0.5475172943889316, "grad_norm": 0.40781137347221375, "learning_rate": 4.629568331958647e-05, "loss": 2.8013, "step": 6678 }, { "epoch": 0.54768127081732, "grad_norm": 0.38069507479667664, "learning_rate": 4.626864529621346e-05, "loss": 2.7777, "step": 6680 }, { "epoch": 0.5478452472457084, "grad_norm": 0.37233811616897583, "learning_rate": 4.624160837003655e-05, "loss": 2.7827, "step": 6682 }, { "epoch": 0.5480092236740969, "grad_norm": 0.4252106845378876, "learning_rate": 4.621457254900586e-05, "loss": 2.7698, "step": 6684 }, { "epoch": 0.5481732001024853, "grad_norm": 0.42072904109954834, "learning_rate": 4.618753784107121e-05, "loss": 2.8281, "step": 6686 }, { "epoch": 0.5483371765308737, "grad_norm": 0.39454519748687744, "learning_rate": 4.61605042541821e-05, "loss": 2.739, "step": 6688 }, { "epoch": 0.5485011529592622, "grad_norm": 0.40513092279434204, "learning_rate": 4.61334717962877e-05, "loss": 2.7624, "step": 6690 }, { "epoch": 0.5486651293876506, "grad_norm": 0.4346267580986023, "learning_rate": 4.6106440475336796e-05, "loss": 2.7803, "step": 6692 }, { "epoch": 0.5488291058160389, "grad_norm": 0.42634353041648865, "learning_rate": 4.6079410299277955e-05, "loss": 2.7594, "step": 6694 }, { "epoch": 0.5489930822444273, "grad_norm": 0.3908812701702118, "learning_rate": 4.605238127605929e-05, "loss": 2.7085, "step": 6696 }, { "epoch": 0.5491570586728158, "grad_norm": 0.42012181878089905, "learning_rate": 4.602535341362861e-05, "loss": 2.7227, "step": 6698 }, { "epoch": 0.5493210351012042, "grad_norm": 0.42639926075935364, "learning_rate": 4.599832671993344e-05, "loss": 2.7423, "step": 6700 }, { "epoch": 0.5494850115295926, "grad_norm": 0.3801124393939972, "learning_rate": 4.597130120292089e-05, "loss": 2.7533, "step": 6702 }, { "epoch": 0.549648987957981, "grad_norm": 0.3986124098300934, "learning_rate": 4.5944276870537747e-05, "loss": 2.7356, "step": 6704 }, { "epoch": 0.5498129643863695, "grad_norm": 0.40717771649360657, "learning_rate": 4.5917253730730444e-05, "loss": 2.7659, "step": 6706 }, { "epoch": 0.5499769408147579, "grad_norm": 0.3899606764316559, "learning_rate": 4.589023179144511e-05, "loss": 2.7652, "step": 6708 }, { "epoch": 0.5501409172431463, "grad_norm": 0.39462822675704956, "learning_rate": 4.586321106062744e-05, "loss": 2.7228, "step": 6710 }, { "epoch": 0.5503048936715347, "grad_norm": 0.4435010254383087, "learning_rate": 4.583619154622285e-05, "loss": 2.7398, "step": 6712 }, { "epoch": 0.5504688700999232, "grad_norm": 0.41980791091918945, "learning_rate": 4.580917325617634e-05, "loss": 2.7475, "step": 6714 }, { "epoch": 0.5506328465283116, "grad_norm": 0.41926372051239014, "learning_rate": 4.578215619843257e-05, "loss": 2.7747, "step": 6716 }, { "epoch": 0.5507968229567, "grad_norm": 0.40478911995887756, "learning_rate": 4.575514038093587e-05, "loss": 2.7238, "step": 6718 }, { "epoch": 0.5509607993850884, "grad_norm": 0.4250586926937103, "learning_rate": 4.572812581163017e-05, "loss": 2.7914, "step": 6720 }, { "epoch": 0.5511247758134769, "grad_norm": 0.3964104950428009, "learning_rate": 4.570111249845901e-05, "loss": 2.7644, "step": 6722 }, { "epoch": 0.5512887522418652, "grad_norm": 0.4339950978755951, "learning_rate": 4.567410044936564e-05, "loss": 2.72, "step": 6724 }, { "epoch": 0.5514527286702536, "grad_norm": 0.4455949366092682, "learning_rate": 4.564708967229286e-05, "loss": 2.7627, "step": 6726 }, { "epoch": 0.551616705098642, "grad_norm": 0.42049476504325867, "learning_rate": 4.5620080175183116e-05, "loss": 2.7423, "step": 6728 }, { "epoch": 0.5517806815270305, "grad_norm": 0.4246431887149811, "learning_rate": 4.559307196597852e-05, "loss": 2.7474, "step": 6730 }, { "epoch": 0.5519446579554189, "grad_norm": 0.45321446657180786, "learning_rate": 4.556606505262074e-05, "loss": 2.7179, "step": 6732 }, { "epoch": 0.5521086343838073, "grad_norm": 0.39583444595336914, "learning_rate": 4.553905944305111e-05, "loss": 2.7737, "step": 6734 }, { "epoch": 0.5522726108121957, "grad_norm": 0.39999717473983765, "learning_rate": 4.5512055145210576e-05, "loss": 2.7749, "step": 6736 }, { "epoch": 0.5524365872405842, "grad_norm": 0.3975955545902252, "learning_rate": 4.548505216703968e-05, "loss": 2.7756, "step": 6738 }, { "epoch": 0.5526005636689726, "grad_norm": 0.3958473205566406, "learning_rate": 4.545805051647855e-05, "loss": 2.752, "step": 6740 }, { "epoch": 0.552764540097361, "grad_norm": 0.37771138548851013, "learning_rate": 4.543105020146702e-05, "loss": 2.8027, "step": 6742 }, { "epoch": 0.5529285165257494, "grad_norm": 0.419717937707901, "learning_rate": 4.540405122994444e-05, "loss": 2.7716, "step": 6744 }, { "epoch": 0.5530924929541379, "grad_norm": 0.4406159818172455, "learning_rate": 4.5377053609849764e-05, "loss": 2.7907, "step": 6746 }, { "epoch": 0.5532564693825263, "grad_norm": 0.38063448667526245, "learning_rate": 4.535005734912163e-05, "loss": 2.7008, "step": 6748 }, { "epoch": 0.5534204458109147, "grad_norm": 0.440857857465744, "learning_rate": 4.532306245569821e-05, "loss": 2.748, "step": 6750 }, { "epoch": 0.5535844222393032, "grad_norm": 0.44015419483184814, "learning_rate": 4.529606893751727e-05, "loss": 2.7657, "step": 6752 }, { "epoch": 0.5537483986676915, "grad_norm": 0.4137510061264038, "learning_rate": 4.526907680251621e-05, "loss": 2.7373, "step": 6754 }, { "epoch": 0.5539123750960799, "grad_norm": 0.42115190625190735, "learning_rate": 4.524208605863198e-05, "loss": 2.7341, "step": 6756 }, { "epoch": 0.5540763515244683, "grad_norm": 0.42313051223754883, "learning_rate": 4.521509671380117e-05, "loss": 2.7291, "step": 6758 }, { "epoch": 0.5542403279528568, "grad_norm": 0.4206005930900574, "learning_rate": 4.518810877595993e-05, "loss": 2.7165, "step": 6760 }, { "epoch": 0.5544043043812452, "grad_norm": 0.41328859329223633, "learning_rate": 4.5161122253043995e-05, "loss": 2.748, "step": 6762 }, { "epoch": 0.5545682808096336, "grad_norm": 0.43345096707344055, "learning_rate": 4.513413715298867e-05, "loss": 2.7707, "step": 6764 }, { "epoch": 0.554732257238022, "grad_norm": 0.4347042143344879, "learning_rate": 4.51071534837289e-05, "loss": 2.7036, "step": 6766 }, { "epoch": 0.5548962336664105, "grad_norm": 0.41023269295692444, "learning_rate": 4.508017125319914e-05, "loss": 2.7155, "step": 6768 }, { "epoch": 0.5550602100947989, "grad_norm": 0.38797706365585327, "learning_rate": 4.505319046933344e-05, "loss": 2.7527, "step": 6770 }, { "epoch": 0.5552241865231873, "grad_norm": 0.40941694378852844, "learning_rate": 4.502621114006548e-05, "loss": 2.7829, "step": 6772 }, { "epoch": 0.5553881629515757, "grad_norm": 0.42043906450271606, "learning_rate": 4.4999233273328436e-05, "loss": 2.7298, "step": 6774 }, { "epoch": 0.5555521393799642, "grad_norm": 0.4022752344608307, "learning_rate": 4.497225687705509e-05, "loss": 2.7405, "step": 6776 }, { "epoch": 0.5557161158083526, "grad_norm": 0.4208052158355713, "learning_rate": 4.49452819591778e-05, "loss": 2.7314, "step": 6778 }, { "epoch": 0.555880092236741, "grad_norm": 0.45849183201789856, "learning_rate": 4.4918308527628466e-05, "loss": 2.7521, "step": 6780 }, { "epoch": 0.5560440686651293, "grad_norm": 0.41802626848220825, "learning_rate": 4.489133659033854e-05, "loss": 2.8101, "step": 6782 }, { "epoch": 0.5562080450935178, "grad_norm": 0.4206705391407013, "learning_rate": 4.4864366155239114e-05, "loss": 2.7493, "step": 6784 }, { "epoch": 0.5563720215219062, "grad_norm": 0.4085046052932739, "learning_rate": 4.483739723026075e-05, "loss": 2.7551, "step": 6786 }, { "epoch": 0.5565359979502946, "grad_norm": 0.44878071546554565, "learning_rate": 4.481042982333356e-05, "loss": 2.7041, "step": 6788 }, { "epoch": 0.556699974378683, "grad_norm": 0.48486924171447754, "learning_rate": 4.478346394238731e-05, "loss": 2.7416, "step": 6790 }, { "epoch": 0.5568639508070715, "grad_norm": 0.44023939967155457, "learning_rate": 4.475649959535123e-05, "loss": 2.755, "step": 6792 }, { "epoch": 0.5570279272354599, "grad_norm": 0.41545045375823975, "learning_rate": 4.472953679015409e-05, "loss": 2.7685, "step": 6794 }, { "epoch": 0.5571919036638483, "grad_norm": 0.4188261926174164, "learning_rate": 4.47025755347243e-05, "loss": 2.7642, "step": 6796 }, { "epoch": 0.5573558800922367, "grad_norm": 0.4364667534828186, "learning_rate": 4.4675615836989716e-05, "loss": 2.841, "step": 6798 }, { "epoch": 0.5575198565206252, "grad_norm": 0.4194522202014923, "learning_rate": 4.464865770487777e-05, "loss": 2.7053, "step": 6800 }, { "epoch": 0.5576838329490136, "grad_norm": 0.4182877242565155, "learning_rate": 4.462170114631546e-05, "loss": 2.7184, "step": 6802 }, { "epoch": 0.557847809377402, "grad_norm": 0.40800806879997253, "learning_rate": 4.459474616922928e-05, "loss": 2.7706, "step": 6804 }, { "epoch": 0.5580117858057905, "grad_norm": 0.39892709255218506, "learning_rate": 4.456779278154527e-05, "loss": 2.7453, "step": 6806 }, { "epoch": 0.5581757622341789, "grad_norm": 0.3947122097015381, "learning_rate": 4.454084099118904e-05, "loss": 2.7564, "step": 6808 }, { "epoch": 0.5583397386625673, "grad_norm": 0.3964519798755646, "learning_rate": 4.451389080608569e-05, "loss": 2.7603, "step": 6810 }, { "epoch": 0.5585037150909556, "grad_norm": 0.4413125514984131, "learning_rate": 4.448694223415983e-05, "loss": 2.7284, "step": 6812 }, { "epoch": 0.5586676915193441, "grad_norm": 0.40390545129776, "learning_rate": 4.445999528333567e-05, "loss": 2.6989, "step": 6814 }, { "epoch": 0.5588316679477325, "grad_norm": 0.41920092701911926, "learning_rate": 4.4433049961536874e-05, "loss": 2.718, "step": 6816 }, { "epoch": 0.5589956443761209, "grad_norm": 0.4399784207344055, "learning_rate": 4.4406106276686624e-05, "loss": 2.814, "step": 6818 }, { "epoch": 0.5591596208045093, "grad_norm": 0.45444971323013306, "learning_rate": 4.4379164236707706e-05, "loss": 2.7831, "step": 6820 }, { "epoch": 0.5593235972328978, "grad_norm": 0.42461079359054565, "learning_rate": 4.435222384952233e-05, "loss": 2.7525, "step": 6822 }, { "epoch": 0.5594875736612862, "grad_norm": 0.4275374710559845, "learning_rate": 4.4325285123052243e-05, "loss": 2.7201, "step": 6824 }, { "epoch": 0.5596515500896746, "grad_norm": 0.4361192584037781, "learning_rate": 4.429834806521874e-05, "loss": 2.7672, "step": 6826 }, { "epoch": 0.559815526518063, "grad_norm": 0.38457778096199036, "learning_rate": 4.427141268394258e-05, "loss": 2.7359, "step": 6828 }, { "epoch": 0.5599795029464515, "grad_norm": 0.3938714861869812, "learning_rate": 4.4244478987144034e-05, "loss": 2.7471, "step": 6830 }, { "epoch": 0.5601434793748399, "grad_norm": 0.398783802986145, "learning_rate": 4.421754698274294e-05, "loss": 2.8162, "step": 6832 }, { "epoch": 0.5603074558032283, "grad_norm": 0.4110495448112488, "learning_rate": 4.4190616678658566e-05, "loss": 2.7719, "step": 6834 }, { "epoch": 0.5604714322316167, "grad_norm": 0.43369707465171814, "learning_rate": 4.4163688082809674e-05, "loss": 2.7541, "step": 6836 }, { "epoch": 0.5606354086600052, "grad_norm": 0.4118701219558716, "learning_rate": 4.4136761203114605e-05, "loss": 2.7743, "step": 6838 }, { "epoch": 0.5607993850883936, "grad_norm": 0.38589438796043396, "learning_rate": 4.410983604749113e-05, "loss": 2.7358, "step": 6840 }, { "epoch": 0.5609633615167819, "grad_norm": 0.3737694025039673, "learning_rate": 4.408291262385651e-05, "loss": 2.733, "step": 6842 }, { "epoch": 0.5611273379451703, "grad_norm": 0.4078843593597412, "learning_rate": 4.405599094012752e-05, "loss": 2.75, "step": 6844 }, { "epoch": 0.5612913143735588, "grad_norm": 0.3856929838657379, "learning_rate": 4.402907100422043e-05, "loss": 2.6992, "step": 6846 }, { "epoch": 0.5614552908019472, "grad_norm": 0.4095956087112427, "learning_rate": 4.400215282405097e-05, "loss": 2.7023, "step": 6848 }, { "epoch": 0.5616192672303356, "grad_norm": 0.4438058137893677, "learning_rate": 4.397523640753438e-05, "loss": 2.683, "step": 6850 }, { "epoch": 0.561783243658724, "grad_norm": 0.4461391568183899, "learning_rate": 4.394832176258537e-05, "loss": 2.7345, "step": 6852 }, { "epoch": 0.5619472200871125, "grad_norm": 0.43370214104652405, "learning_rate": 4.392140889711809e-05, "loss": 2.7888, "step": 6854 }, { "epoch": 0.5621111965155009, "grad_norm": 0.4333142340183258, "learning_rate": 4.389449781904627e-05, "loss": 2.7502, "step": 6856 }, { "epoch": 0.5622751729438893, "grad_norm": 0.40552252531051636, "learning_rate": 4.386758853628301e-05, "loss": 2.791, "step": 6858 }, { "epoch": 0.5624391493722777, "grad_norm": 0.41940203309059143, "learning_rate": 4.3840681056740904e-05, "loss": 2.778, "step": 6860 }, { "epoch": 0.5626031258006662, "grad_norm": 0.3927070200443268, "learning_rate": 4.3813775388332076e-05, "loss": 2.7979, "step": 6862 }, { "epoch": 0.5627671022290546, "grad_norm": 0.4172367751598358, "learning_rate": 4.3786871538968054e-05, "loss": 2.8007, "step": 6864 }, { "epoch": 0.562931078657443, "grad_norm": 0.4077468514442444, "learning_rate": 4.375996951655984e-05, "loss": 2.7384, "step": 6866 }, { "epoch": 0.5630950550858315, "grad_norm": 0.40657129883766174, "learning_rate": 4.373306932901794e-05, "loss": 2.7395, "step": 6868 }, { "epoch": 0.5632590315142199, "grad_norm": 0.37659966945648193, "learning_rate": 4.370617098425226e-05, "loss": 2.7772, "step": 6870 }, { "epoch": 0.5634230079426082, "grad_norm": 0.38550299406051636, "learning_rate": 4.367927449017221e-05, "loss": 2.7439, "step": 6872 }, { "epoch": 0.5635869843709966, "grad_norm": 0.3852583169937134, "learning_rate": 4.365237985468664e-05, "loss": 2.7548, "step": 6874 }, { "epoch": 0.5637509607993851, "grad_norm": 0.40112239122390747, "learning_rate": 4.362548708570386e-05, "loss": 2.7937, "step": 6876 }, { "epoch": 0.5639149372277735, "grad_norm": 0.3871251344680786, "learning_rate": 4.35985961911316e-05, "loss": 2.7476, "step": 6878 }, { "epoch": 0.5640789136561619, "grad_norm": 0.39240968227386475, "learning_rate": 4.3571707178877096e-05, "loss": 2.7073, "step": 6880 }, { "epoch": 0.5642428900845503, "grad_norm": 0.3936747610569, "learning_rate": 4.3544820056846995e-05, "loss": 2.7616, "step": 6882 }, { "epoch": 0.5644068665129388, "grad_norm": 0.3985251784324646, "learning_rate": 4.351793483294736e-05, "loss": 2.753, "step": 6884 }, { "epoch": 0.5645708429413272, "grad_norm": 0.4452661871910095, "learning_rate": 4.349105151508377e-05, "loss": 2.7652, "step": 6886 }, { "epoch": 0.5647348193697156, "grad_norm": 0.431638240814209, "learning_rate": 4.346417011116118e-05, "loss": 2.8226, "step": 6888 }, { "epoch": 0.564898795798104, "grad_norm": 0.4476519525051117, "learning_rate": 4.3437290629084e-05, "loss": 2.7056, "step": 6890 }, { "epoch": 0.5650627722264925, "grad_norm": 0.39053019881248474, "learning_rate": 4.34104130767561e-05, "loss": 2.7887, "step": 6892 }, { "epoch": 0.5652267486548809, "grad_norm": 0.3967573046684265, "learning_rate": 4.338353746208073e-05, "loss": 2.7063, "step": 6894 }, { "epoch": 0.5653907250832693, "grad_norm": 0.3882686197757721, "learning_rate": 4.335666379296062e-05, "loss": 2.7154, "step": 6896 }, { "epoch": 0.5655547015116577, "grad_norm": 0.4172016680240631, "learning_rate": 4.3329792077297914e-05, "loss": 2.7593, "step": 6898 }, { "epoch": 0.5657186779400462, "grad_norm": 0.4219083786010742, "learning_rate": 4.330292232299417e-05, "loss": 2.7654, "step": 6900 }, { "epoch": 0.5658826543684345, "grad_norm": 0.3960023820400238, "learning_rate": 4.327605453795036e-05, "loss": 2.7306, "step": 6902 }, { "epoch": 0.5660466307968229, "grad_norm": 0.40032973885536194, "learning_rate": 4.324918873006692e-05, "loss": 2.7474, "step": 6904 }, { "epoch": 0.5662106072252113, "grad_norm": 0.36831405758857727, "learning_rate": 4.322232490724367e-05, "loss": 2.7406, "step": 6906 }, { "epoch": 0.5663745836535998, "grad_norm": 0.395846962928772, "learning_rate": 4.319546307737983e-05, "loss": 2.7481, "step": 6908 }, { "epoch": 0.5665385600819882, "grad_norm": 0.4093070328235626, "learning_rate": 4.3168603248374096e-05, "loss": 2.6996, "step": 6910 }, { "epoch": 0.5667025365103766, "grad_norm": 0.4207175374031067, "learning_rate": 4.314174542812452e-05, "loss": 2.7349, "step": 6912 }, { "epoch": 0.566866512938765, "grad_norm": 0.3914620578289032, "learning_rate": 4.311488962452857e-05, "loss": 2.7575, "step": 6914 }, { "epoch": 0.5670304893671535, "grad_norm": 0.3748440146446228, "learning_rate": 4.3088035845483155e-05, "loss": 2.7483, "step": 6916 }, { "epoch": 0.5671944657955419, "grad_norm": 0.3811940550804138, "learning_rate": 4.306118409888455e-05, "loss": 2.7513, "step": 6918 }, { "epoch": 0.5673584422239303, "grad_norm": 0.3829500675201416, "learning_rate": 4.303433439262843e-05, "loss": 2.7502, "step": 6920 }, { "epoch": 0.5675224186523188, "grad_norm": 0.4123048782348633, "learning_rate": 4.300748673460993e-05, "loss": 2.754, "step": 6922 }, { "epoch": 0.5676863950807072, "grad_norm": 0.38617756962776184, "learning_rate": 4.298064113272353e-05, "loss": 2.7199, "step": 6924 }, { "epoch": 0.5678503715090956, "grad_norm": 0.37602609395980835, "learning_rate": 4.2953797594863074e-05, "loss": 2.727, "step": 6926 }, { "epoch": 0.568014347937484, "grad_norm": 0.40039005875587463, "learning_rate": 4.2926956128921895e-05, "loss": 2.8184, "step": 6928 }, { "epoch": 0.5681783243658725, "grad_norm": 0.3854617178440094, "learning_rate": 4.290011674279264e-05, "loss": 2.77, "step": 6930 }, { "epoch": 0.5683423007942608, "grad_norm": 0.3800494968891144, "learning_rate": 4.2873279444367346e-05, "loss": 2.7059, "step": 6932 }, { "epoch": 0.5685062772226492, "grad_norm": 0.3799922466278076, "learning_rate": 4.28464442415375e-05, "loss": 2.7154, "step": 6934 }, { "epoch": 0.5686702536510376, "grad_norm": 0.39791008830070496, "learning_rate": 4.281961114219392e-05, "loss": 2.7433, "step": 6936 }, { "epoch": 0.5688342300794261, "grad_norm": 0.3874998092651367, "learning_rate": 4.279278015422678e-05, "loss": 2.7691, "step": 6938 }, { "epoch": 0.5689982065078145, "grad_norm": 0.40580660104751587, "learning_rate": 4.276595128552572e-05, "loss": 2.7029, "step": 6940 }, { "epoch": 0.5691621829362029, "grad_norm": 0.4420451521873474, "learning_rate": 4.273912454397968e-05, "loss": 2.7574, "step": 6942 }, { "epoch": 0.5693261593645913, "grad_norm": 0.42256930470466614, "learning_rate": 4.2712299937476976e-05, "loss": 2.7133, "step": 6944 }, { "epoch": 0.5694901357929798, "grad_norm": 0.414419949054718, "learning_rate": 4.2685477473905366e-05, "loss": 2.7022, "step": 6946 }, { "epoch": 0.5696541122213682, "grad_norm": 0.40276476740837097, "learning_rate": 4.265865716115191e-05, "loss": 2.7375, "step": 6948 }, { "epoch": 0.5698180886497566, "grad_norm": 0.4238226115703583, "learning_rate": 4.263183900710304e-05, "loss": 2.7346, "step": 6950 }, { "epoch": 0.569982065078145, "grad_norm": 0.41304221749305725, "learning_rate": 4.260502301964462e-05, "loss": 2.7534, "step": 6952 }, { "epoch": 0.5701460415065335, "grad_norm": 0.3909514546394348, "learning_rate": 4.257820920666179e-05, "loss": 2.7477, "step": 6954 }, { "epoch": 0.5703100179349219, "grad_norm": 0.3825553357601166, "learning_rate": 4.255139757603907e-05, "loss": 2.7187, "step": 6956 }, { "epoch": 0.5704739943633103, "grad_norm": 0.40624940395355225, "learning_rate": 4.252458813566042e-05, "loss": 2.7643, "step": 6958 }, { "epoch": 0.5706379707916986, "grad_norm": 0.4360128939151764, "learning_rate": 4.249778089340905e-05, "loss": 2.8152, "step": 6960 }, { "epoch": 0.5708019472200871, "grad_norm": 0.3969877362251282, "learning_rate": 4.2470975857167566e-05, "loss": 2.7356, "step": 6962 }, { "epoch": 0.5709659236484755, "grad_norm": 0.3684930205345154, "learning_rate": 4.244417303481795e-05, "loss": 2.7356, "step": 6964 }, { "epoch": 0.5711299000768639, "grad_norm": 0.3783034682273865, "learning_rate": 4.2417372434241484e-05, "loss": 2.7531, "step": 6966 }, { "epoch": 0.5712938765052523, "grad_norm": 0.37606626749038696, "learning_rate": 4.239057406331881e-05, "loss": 2.75, "step": 6968 }, { "epoch": 0.5714578529336408, "grad_norm": 0.41963014006614685, "learning_rate": 4.236377792992997e-05, "loss": 2.7568, "step": 6970 }, { "epoch": 0.5716218293620292, "grad_norm": 0.4141043722629547, "learning_rate": 4.233698404195429e-05, "loss": 2.7553, "step": 6972 }, { "epoch": 0.5717858057904176, "grad_norm": 0.4187765419483185, "learning_rate": 4.231019240727041e-05, "loss": 2.7638, "step": 6974 }, { "epoch": 0.5719497822188061, "grad_norm": 0.3854373097419739, "learning_rate": 4.2283403033756395e-05, "loss": 2.7458, "step": 6976 }, { "epoch": 0.5721137586471945, "grad_norm": 0.3839758038520813, "learning_rate": 4.225661592928958e-05, "loss": 2.718, "step": 6978 }, { "epoch": 0.5722777350755829, "grad_norm": 0.3789431154727936, "learning_rate": 4.222983110174664e-05, "loss": 2.7418, "step": 6980 }, { "epoch": 0.5724417115039713, "grad_norm": 0.3935195207595825, "learning_rate": 4.220304855900361e-05, "loss": 2.7965, "step": 6982 }, { "epoch": 0.5726056879323598, "grad_norm": 0.38839125633239746, "learning_rate": 4.217626830893583e-05, "loss": 2.8064, "step": 6984 }, { "epoch": 0.5727696643607482, "grad_norm": 0.3698122203350067, "learning_rate": 4.2149490359417946e-05, "loss": 2.7358, "step": 6986 }, { "epoch": 0.5729336407891366, "grad_norm": 0.370185911655426, "learning_rate": 4.2122714718323994e-05, "loss": 2.7785, "step": 6988 }, { "epoch": 0.573097617217525, "grad_norm": 0.38085514307022095, "learning_rate": 4.209594139352725e-05, "loss": 2.7234, "step": 6990 }, { "epoch": 0.5732615936459134, "grad_norm": 0.3918403089046478, "learning_rate": 4.206917039290035e-05, "loss": 2.7152, "step": 6992 }, { "epoch": 0.5734255700743018, "grad_norm": 0.4009610712528229, "learning_rate": 4.2042401724315266e-05, "loss": 2.7066, "step": 6994 }, { "epoch": 0.5735895465026902, "grad_norm": 0.3863372802734375, "learning_rate": 4.2015635395643256e-05, "loss": 2.7088, "step": 6996 }, { "epoch": 0.5737535229310786, "grad_norm": 0.4005741775035858, "learning_rate": 4.198887141475487e-05, "loss": 2.7371, "step": 6998 }, { "epoch": 0.5739174993594671, "grad_norm": 0.3993152678012848, "learning_rate": 4.1962109789520034e-05, "loss": 2.774, "step": 7000 }, { "epoch": 0.5740814757878555, "grad_norm": 0.4045857787132263, "learning_rate": 4.1935350527807915e-05, "loss": 2.752, "step": 7002 }, { "epoch": 0.5742454522162439, "grad_norm": 0.37730270624160767, "learning_rate": 4.190859363748701e-05, "loss": 2.7201, "step": 7004 }, { "epoch": 0.5744094286446323, "grad_norm": 0.38126033544540405, "learning_rate": 4.1881839126425145e-05, "loss": 2.7285, "step": 7006 }, { "epoch": 0.5745734050730208, "grad_norm": 0.42331182956695557, "learning_rate": 4.1855087002489385e-05, "loss": 2.7279, "step": 7008 }, { "epoch": 0.5747373815014092, "grad_norm": 0.4086098372936249, "learning_rate": 4.182833727354615e-05, "loss": 2.7349, "step": 7010 }, { "epoch": 0.5749013579297976, "grad_norm": 0.40086227655410767, "learning_rate": 4.180158994746114e-05, "loss": 2.7495, "step": 7012 }, { "epoch": 0.575065334358186, "grad_norm": 0.450406938791275, "learning_rate": 4.177484503209934e-05, "loss": 2.7815, "step": 7014 }, { "epoch": 0.5752293107865745, "grad_norm": 0.42005404829978943, "learning_rate": 4.1748102535325e-05, "loss": 2.7631, "step": 7016 }, { "epoch": 0.5753932872149629, "grad_norm": 0.38951101899147034, "learning_rate": 4.1721362465001734e-05, "loss": 2.7027, "step": 7018 }, { "epoch": 0.5755572636433512, "grad_norm": 0.4067320227622986, "learning_rate": 4.1694624828992377e-05, "loss": 2.7211, "step": 7020 }, { "epoch": 0.5757212400717396, "grad_norm": 0.4426279366016388, "learning_rate": 4.1667889635159044e-05, "loss": 2.8495, "step": 7022 }, { "epoch": 0.5758852165001281, "grad_norm": 0.4493538737297058, "learning_rate": 4.1641156891363206e-05, "loss": 2.7582, "step": 7024 }, { "epoch": 0.5760491929285165, "grad_norm": 0.46079501509666443, "learning_rate": 4.161442660546553e-05, "loss": 2.7274, "step": 7026 }, { "epoch": 0.5762131693569049, "grad_norm": 0.49728041887283325, "learning_rate": 4.158769878532599e-05, "loss": 2.7489, "step": 7028 }, { "epoch": 0.5763771457852933, "grad_norm": 0.4794892370700836, "learning_rate": 4.156097343880386e-05, "loss": 2.7606, "step": 7030 }, { "epoch": 0.5765411222136818, "grad_norm": 0.4507454037666321, "learning_rate": 4.153425057375766e-05, "loss": 2.7434, "step": 7032 }, { "epoch": 0.5767050986420702, "grad_norm": 0.4382268190383911, "learning_rate": 4.150753019804515e-05, "loss": 2.7202, "step": 7034 }, { "epoch": 0.5768690750704586, "grad_norm": 0.43126344680786133, "learning_rate": 4.148081231952346e-05, "loss": 2.8246, "step": 7036 }, { "epoch": 0.5770330514988471, "grad_norm": 0.4064197242259979, "learning_rate": 4.145409694604887e-05, "loss": 2.7816, "step": 7038 }, { "epoch": 0.5771970279272355, "grad_norm": 0.43481525778770447, "learning_rate": 4.142738408547696e-05, "loss": 2.7149, "step": 7040 }, { "epoch": 0.5773610043556239, "grad_norm": 0.4244084656238556, "learning_rate": 4.1400673745662644e-05, "loss": 2.7205, "step": 7042 }, { "epoch": 0.5775249807840123, "grad_norm": 0.42674392461776733, "learning_rate": 4.137396593445999e-05, "loss": 2.751, "step": 7044 }, { "epoch": 0.5776889572124008, "grad_norm": 0.40948551893234253, "learning_rate": 4.1347260659722365e-05, "loss": 2.7478, "step": 7046 }, { "epoch": 0.5778529336407892, "grad_norm": 0.3910583257675171, "learning_rate": 4.132055792930242e-05, "loss": 2.7325, "step": 7048 }, { "epoch": 0.5780169100691775, "grad_norm": 0.397865355014801, "learning_rate": 4.129385775105201e-05, "loss": 2.7733, "step": 7050 }, { "epoch": 0.5781808864975659, "grad_norm": 0.4031313359737396, "learning_rate": 4.126716013282226e-05, "loss": 2.6976, "step": 7052 }, { "epoch": 0.5783448629259544, "grad_norm": 0.41783228516578674, "learning_rate": 4.124046508246356e-05, "loss": 2.7627, "step": 7054 }, { "epoch": 0.5785088393543428, "grad_norm": 0.4214140474796295, "learning_rate": 4.121377260782551e-05, "loss": 2.7367, "step": 7056 }, { "epoch": 0.5786728157827312, "grad_norm": 0.40227892994880676, "learning_rate": 4.118708271675695e-05, "loss": 2.6962, "step": 7058 }, { "epoch": 0.5788367922111196, "grad_norm": 0.4140949249267578, "learning_rate": 4.1160395417106026e-05, "loss": 2.7505, "step": 7060 }, { "epoch": 0.5790007686395081, "grad_norm": 0.426513671875, "learning_rate": 4.113371071672005e-05, "loss": 2.7943, "step": 7062 }, { "epoch": 0.5791647450678965, "grad_norm": 0.43574535846710205, "learning_rate": 4.110702862344557e-05, "loss": 2.7699, "step": 7064 }, { "epoch": 0.5793287214962849, "grad_norm": 0.3993785083293915, "learning_rate": 4.108034914512845e-05, "loss": 2.731, "step": 7066 }, { "epoch": 0.5794926979246733, "grad_norm": 0.4101898670196533, "learning_rate": 4.105367228961369e-05, "loss": 2.776, "step": 7068 }, { "epoch": 0.5796566743530618, "grad_norm": 0.41295358538627625, "learning_rate": 4.102699806474555e-05, "loss": 2.7528, "step": 7070 }, { "epoch": 0.5798206507814502, "grad_norm": 0.39692431688308716, "learning_rate": 4.100032647836756e-05, "loss": 2.7873, "step": 7072 }, { "epoch": 0.5799846272098386, "grad_norm": 0.4573379456996918, "learning_rate": 4.097365753832241e-05, "loss": 2.7406, "step": 7074 }, { "epoch": 0.580148603638227, "grad_norm": 0.45833566784858704, "learning_rate": 4.094699125245204e-05, "loss": 2.7557, "step": 7076 }, { "epoch": 0.5803125800666155, "grad_norm": 0.4373979866504669, "learning_rate": 4.092032762859762e-05, "loss": 2.7181, "step": 7078 }, { "epoch": 0.5804765564950038, "grad_norm": 0.3798002004623413, "learning_rate": 4.089366667459952e-05, "loss": 2.7446, "step": 7080 }, { "epoch": 0.5806405329233922, "grad_norm": 0.3701511323451996, "learning_rate": 4.086700839829731e-05, "loss": 2.7273, "step": 7082 }, { "epoch": 0.5808045093517806, "grad_norm": 0.40236371755599976, "learning_rate": 4.084035280752983e-05, "loss": 2.7816, "step": 7084 }, { "epoch": 0.5809684857801691, "grad_norm": 0.4041323959827423, "learning_rate": 4.0813699910135075e-05, "loss": 2.6938, "step": 7086 }, { "epoch": 0.5811324622085575, "grad_norm": 0.4038769602775574, "learning_rate": 4.0787049713950256e-05, "loss": 2.7723, "step": 7088 }, { "epoch": 0.5812964386369459, "grad_norm": 0.403374582529068, "learning_rate": 4.076040222681183e-05, "loss": 2.7374, "step": 7090 }, { "epoch": 0.5814604150653344, "grad_norm": 0.4049570858478546, "learning_rate": 4.073375745655541e-05, "loss": 2.7626, "step": 7092 }, { "epoch": 0.5816243914937228, "grad_norm": 0.41250374913215637, "learning_rate": 4.070711541101581e-05, "loss": 2.8086, "step": 7094 }, { "epoch": 0.5817883679221112, "grad_norm": 0.3828825056552887, "learning_rate": 4.06804760980271e-05, "loss": 2.7755, "step": 7096 }, { "epoch": 0.5819523443504996, "grad_norm": 0.3960285186767578, "learning_rate": 4.0653839525422486e-05, "loss": 2.7239, "step": 7098 }, { "epoch": 0.5821163207788881, "grad_norm": 0.3921143114566803, "learning_rate": 4.062720570103439e-05, "loss": 2.7552, "step": 7100 }, { "epoch": 0.5822802972072765, "grad_norm": 0.4139832556247711, "learning_rate": 4.0600574632694426e-05, "loss": 2.7164, "step": 7102 }, { "epoch": 0.5824442736356649, "grad_norm": 0.4004044830799103, "learning_rate": 4.0573946328233406e-05, "loss": 2.7205, "step": 7104 }, { "epoch": 0.5826082500640533, "grad_norm": 0.4060121774673462, "learning_rate": 4.054732079548129e-05, "loss": 2.7587, "step": 7106 }, { "epoch": 0.5827722264924418, "grad_norm": 0.40428370237350464, "learning_rate": 4.052069804226729e-05, "loss": 2.7767, "step": 7108 }, { "epoch": 0.5829362029208301, "grad_norm": 0.3867088854312897, "learning_rate": 4.049407807641975e-05, "loss": 2.7074, "step": 7110 }, { "epoch": 0.5831001793492185, "grad_norm": 0.4056602120399475, "learning_rate": 4.0467460905766194e-05, "loss": 2.7435, "step": 7112 }, { "epoch": 0.5832641557776069, "grad_norm": 0.39436739683151245, "learning_rate": 4.044084653813337e-05, "loss": 2.7493, "step": 7114 }, { "epoch": 0.5834281322059954, "grad_norm": 0.40216735005378723, "learning_rate": 4.041423498134715e-05, "loss": 2.7319, "step": 7116 }, { "epoch": 0.5835921086343838, "grad_norm": 0.3866954445838928, "learning_rate": 4.038762624323259e-05, "loss": 2.718, "step": 7118 }, { "epoch": 0.5837560850627722, "grad_norm": 0.3950190544128418, "learning_rate": 4.0361020331613944e-05, "loss": 2.8338, "step": 7120 }, { "epoch": 0.5839200614911606, "grad_norm": 0.392497181892395, "learning_rate": 4.033441725431462e-05, "loss": 2.7203, "step": 7122 }, { "epoch": 0.5840840379195491, "grad_norm": 0.4107820391654968, "learning_rate": 4.030781701915718e-05, "loss": 2.7361, "step": 7124 }, { "epoch": 0.5842480143479375, "grad_norm": 0.4411157965660095, "learning_rate": 4.028121963396337e-05, "loss": 2.718, "step": 7126 }, { "epoch": 0.5844119907763259, "grad_norm": 0.4278470277786255, "learning_rate": 4.0254625106554076e-05, "loss": 2.6954, "step": 7128 }, { "epoch": 0.5845759672047143, "grad_norm": 0.44073304533958435, "learning_rate": 4.0228033444749345e-05, "loss": 2.6976, "step": 7130 }, { "epoch": 0.5847399436331028, "grad_norm": 0.41985079646110535, "learning_rate": 4.020144465636844e-05, "loss": 2.728, "step": 7132 }, { "epoch": 0.5849039200614912, "grad_norm": 0.4166056215763092, "learning_rate": 4.017485874922969e-05, "loss": 2.7258, "step": 7134 }, { "epoch": 0.5850678964898796, "grad_norm": 0.45667922496795654, "learning_rate": 4.014827573115062e-05, "loss": 2.7691, "step": 7136 }, { "epoch": 0.585231872918268, "grad_norm": 0.4428565204143524, "learning_rate": 4.012169560994793e-05, "loss": 2.7454, "step": 7138 }, { "epoch": 0.5853958493466564, "grad_norm": 0.4104422628879547, "learning_rate": 4.009511839343742e-05, "loss": 2.7854, "step": 7140 }, { "epoch": 0.5855598257750448, "grad_norm": 0.3911503255367279, "learning_rate": 4.0068544089434067e-05, "loss": 2.7899, "step": 7142 }, { "epoch": 0.5857238022034332, "grad_norm": 0.4035918414592743, "learning_rate": 4.004197270575198e-05, "loss": 2.7087, "step": 7144 }, { "epoch": 0.5858877786318216, "grad_norm": 0.40716925263404846, "learning_rate": 4.001540425020441e-05, "loss": 2.7271, "step": 7146 }, { "epoch": 0.5860517550602101, "grad_norm": 0.4005942940711975, "learning_rate": 3.998883873060375e-05, "loss": 2.7669, "step": 7148 }, { "epoch": 0.5862157314885985, "grad_norm": 0.37836208939552307, "learning_rate": 3.996227615476154e-05, "loss": 2.754, "step": 7150 }, { "epoch": 0.5863797079169869, "grad_norm": 0.41903597116470337, "learning_rate": 3.993571653048843e-05, "loss": 2.6871, "step": 7152 }, { "epoch": 0.5865436843453754, "grad_norm": 0.3899541199207306, "learning_rate": 3.9909159865594206e-05, "loss": 2.7085, "step": 7154 }, { "epoch": 0.5867076607737638, "grad_norm": 0.4074578881263733, "learning_rate": 3.988260616788781e-05, "loss": 2.7907, "step": 7156 }, { "epoch": 0.5868716372021522, "grad_norm": 0.4121086299419403, "learning_rate": 3.9856055445177305e-05, "loss": 2.7571, "step": 7158 }, { "epoch": 0.5870356136305406, "grad_norm": 0.39414846897125244, "learning_rate": 3.982950770526982e-05, "loss": 2.7257, "step": 7160 }, { "epoch": 0.5871995900589291, "grad_norm": 0.3592352569103241, "learning_rate": 3.980296295597172e-05, "loss": 2.691, "step": 7162 }, { "epoch": 0.5873635664873175, "grad_norm": 0.4083999693393707, "learning_rate": 3.9776421205088386e-05, "loss": 2.7582, "step": 7164 }, { "epoch": 0.5875275429157059, "grad_norm": 0.410372257232666, "learning_rate": 3.9749882460424357e-05, "loss": 2.774, "step": 7166 }, { "epoch": 0.5876915193440942, "grad_norm": 0.41757383942604065, "learning_rate": 3.9723346729783305e-05, "loss": 2.7065, "step": 7168 }, { "epoch": 0.5878554957724828, "grad_norm": 0.44156357645988464, "learning_rate": 3.9696814020967996e-05, "loss": 2.6664, "step": 7170 }, { "epoch": 0.5880194722008711, "grad_norm": 0.43870604038238525, "learning_rate": 3.967028434178028e-05, "loss": 2.7485, "step": 7172 }, { "epoch": 0.5881834486292595, "grad_norm": 0.3917291760444641, "learning_rate": 3.964375770002121e-05, "loss": 2.7919, "step": 7174 }, { "epoch": 0.5883474250576479, "grad_norm": 0.41526177525520325, "learning_rate": 3.961723410349083e-05, "loss": 2.7659, "step": 7176 }, { "epoch": 0.5885114014860364, "grad_norm": 0.39451149106025696, "learning_rate": 3.959071355998834e-05, "loss": 2.7591, "step": 7178 }, { "epoch": 0.5886753779144248, "grad_norm": 0.3906865417957306, "learning_rate": 3.9564196077312084e-05, "loss": 2.7009, "step": 7180 }, { "epoch": 0.5888393543428132, "grad_norm": 0.4263087213039398, "learning_rate": 3.953768166325944e-05, "loss": 2.7435, "step": 7182 }, { "epoch": 0.5890033307712016, "grad_norm": 0.41483640670776367, "learning_rate": 3.951117032562689e-05, "loss": 2.7062, "step": 7184 }, { "epoch": 0.5891673071995901, "grad_norm": 0.3860895335674286, "learning_rate": 3.948466207221007e-05, "loss": 2.7711, "step": 7186 }, { "epoch": 0.5893312836279785, "grad_norm": 0.4068519175052643, "learning_rate": 3.945815691080365e-05, "loss": 2.703, "step": 7188 }, { "epoch": 0.5894952600563669, "grad_norm": 0.4312509000301361, "learning_rate": 3.9431654849201404e-05, "loss": 2.7669, "step": 7190 }, { "epoch": 0.5896592364847553, "grad_norm": 0.3803481459617615, "learning_rate": 3.940515589519622e-05, "loss": 2.7223, "step": 7192 }, { "epoch": 0.5898232129131438, "grad_norm": 0.37895467877388, "learning_rate": 3.9378660056580046e-05, "loss": 2.7479, "step": 7194 }, { "epoch": 0.5899871893415322, "grad_norm": 0.4233595132827759, "learning_rate": 3.9352167341143884e-05, "loss": 2.732, "step": 7196 }, { "epoch": 0.5901511657699205, "grad_norm": 0.40764448046684265, "learning_rate": 3.932567775667792e-05, "loss": 2.7261, "step": 7198 }, { "epoch": 0.5903151421983089, "grad_norm": 0.4058440327644348, "learning_rate": 3.9299191310971314e-05, "loss": 2.7445, "step": 7200 }, { "epoch": 0.5904791186266974, "grad_norm": 0.38775965571403503, "learning_rate": 3.927270801181234e-05, "loss": 2.768, "step": 7202 }, { "epoch": 0.5906430950550858, "grad_norm": 0.36376839876174927, "learning_rate": 3.924622786698837e-05, "loss": 2.7043, "step": 7204 }, { "epoch": 0.5908070714834742, "grad_norm": 0.39008110761642456, "learning_rate": 3.921975088428582e-05, "loss": 2.7474, "step": 7206 }, { "epoch": 0.5909710479118627, "grad_norm": 0.3973616361618042, "learning_rate": 3.9193277071490164e-05, "loss": 2.664, "step": 7208 }, { "epoch": 0.5911350243402511, "grad_norm": 0.3894846439361572, "learning_rate": 3.9166806436386e-05, "loss": 2.7569, "step": 7210 }, { "epoch": 0.5912990007686395, "grad_norm": 0.3805749714374542, "learning_rate": 3.9140338986756954e-05, "loss": 2.7333, "step": 7212 }, { "epoch": 0.5914629771970279, "grad_norm": 0.4085012674331665, "learning_rate": 3.911387473038568e-05, "loss": 2.7184, "step": 7214 }, { "epoch": 0.5916269536254164, "grad_norm": 0.3935090899467468, "learning_rate": 3.908741367505397e-05, "loss": 2.7763, "step": 7216 }, { "epoch": 0.5917909300538048, "grad_norm": 0.3762976825237274, "learning_rate": 3.906095582854262e-05, "loss": 2.7076, "step": 7218 }, { "epoch": 0.5919549064821932, "grad_norm": 0.39394861459732056, "learning_rate": 3.9034501198631465e-05, "loss": 2.7718, "step": 7220 }, { "epoch": 0.5921188829105816, "grad_norm": 0.39268386363983154, "learning_rate": 3.900804979309949e-05, "loss": 2.726, "step": 7222 }, { "epoch": 0.5922828593389701, "grad_norm": 0.40367162227630615, "learning_rate": 3.898160161972463e-05, "loss": 2.7105, "step": 7224 }, { "epoch": 0.5924468357673585, "grad_norm": 0.36791446805000305, "learning_rate": 3.89551566862839e-05, "loss": 2.6979, "step": 7226 }, { "epoch": 0.5926108121957468, "grad_norm": 0.37786632776260376, "learning_rate": 3.89287150005534e-05, "loss": 2.7642, "step": 7228 }, { "epoch": 0.5927747886241352, "grad_norm": 0.36827540397644043, "learning_rate": 3.890227657030823e-05, "loss": 2.7612, "step": 7230 }, { "epoch": 0.5929387650525237, "grad_norm": 0.38697636127471924, "learning_rate": 3.8875841403322534e-05, "loss": 2.7035, "step": 7232 }, { "epoch": 0.5931027414809121, "grad_norm": 0.3698570132255554, "learning_rate": 3.8849409507369536e-05, "loss": 2.7636, "step": 7234 }, { "epoch": 0.5932667179093005, "grad_norm": 0.3766981363296509, "learning_rate": 3.882298089022147e-05, "loss": 2.7442, "step": 7236 }, { "epoch": 0.5934306943376889, "grad_norm": 0.3901519477367401, "learning_rate": 3.8796555559649594e-05, "loss": 2.765, "step": 7238 }, { "epoch": 0.5935946707660774, "grad_norm": 0.38234102725982666, "learning_rate": 3.8770133523424234e-05, "loss": 2.6888, "step": 7240 }, { "epoch": 0.5937586471944658, "grad_norm": 0.3951614797115326, "learning_rate": 3.874371478931471e-05, "loss": 2.7455, "step": 7242 }, { "epoch": 0.5939226236228542, "grad_norm": 0.393010675907135, "learning_rate": 3.8717299365089374e-05, "loss": 2.7469, "step": 7244 }, { "epoch": 0.5940866000512426, "grad_norm": 0.4106244146823883, "learning_rate": 3.869088725851566e-05, "loss": 2.7471, "step": 7246 }, { "epoch": 0.5942505764796311, "grad_norm": 0.40092745423316956, "learning_rate": 3.866447847735997e-05, "loss": 2.7754, "step": 7248 }, { "epoch": 0.5944145529080195, "grad_norm": 0.3768394887447357, "learning_rate": 3.863807302938771e-05, "loss": 2.7274, "step": 7250 }, { "epoch": 0.5945785293364079, "grad_norm": 0.4089185893535614, "learning_rate": 3.861167092236339e-05, "loss": 2.7218, "step": 7252 }, { "epoch": 0.5947425057647963, "grad_norm": 0.4022778868675232, "learning_rate": 3.8585272164050476e-05, "loss": 2.7384, "step": 7254 }, { "epoch": 0.5949064821931848, "grad_norm": 0.4059952199459076, "learning_rate": 3.8558876762211436e-05, "loss": 2.7447, "step": 7256 }, { "epoch": 0.5950704586215732, "grad_norm": 0.39446863532066345, "learning_rate": 3.8532484724607786e-05, "loss": 2.7583, "step": 7258 }, { "epoch": 0.5952344350499615, "grad_norm": 0.4041154682636261, "learning_rate": 3.850609605900007e-05, "loss": 2.7152, "step": 7260 }, { "epoch": 0.59539841147835, "grad_norm": 0.39531728625297546, "learning_rate": 3.847971077314777e-05, "loss": 2.7415, "step": 7262 }, { "epoch": 0.5955623879067384, "grad_norm": 0.42486509680747986, "learning_rate": 3.845332887480946e-05, "loss": 2.7512, "step": 7264 }, { "epoch": 0.5957263643351268, "grad_norm": 0.39751359820365906, "learning_rate": 3.842695037174264e-05, "loss": 2.7359, "step": 7266 }, { "epoch": 0.5958903407635152, "grad_norm": 0.3758830428123474, "learning_rate": 3.8400575271703854e-05, "loss": 2.7356, "step": 7268 }, { "epoch": 0.5960543171919037, "grad_norm": 0.3784623146057129, "learning_rate": 3.837420358244866e-05, "loss": 2.7445, "step": 7270 }, { "epoch": 0.5962182936202921, "grad_norm": 0.38142549991607666, "learning_rate": 3.834783531173157e-05, "loss": 2.7066, "step": 7272 }, { "epoch": 0.5963822700486805, "grad_norm": 0.39958712458610535, "learning_rate": 3.8321470467306105e-05, "loss": 2.7698, "step": 7274 }, { "epoch": 0.5965462464770689, "grad_norm": 0.3814118802547455, "learning_rate": 3.829510905692482e-05, "loss": 2.7364, "step": 7276 }, { "epoch": 0.5967102229054574, "grad_norm": 0.386503130197525, "learning_rate": 3.8268751088339195e-05, "loss": 2.7719, "step": 7278 }, { "epoch": 0.5968741993338458, "grad_norm": 0.3756306767463684, "learning_rate": 3.8242396569299735e-05, "loss": 2.7316, "step": 7280 }, { "epoch": 0.5970381757622342, "grad_norm": 0.37826624512672424, "learning_rate": 3.821604550755593e-05, "loss": 2.7203, "step": 7282 }, { "epoch": 0.5972021521906226, "grad_norm": 0.3793233633041382, "learning_rate": 3.818969791085624e-05, "loss": 2.7435, "step": 7284 }, { "epoch": 0.5973661286190111, "grad_norm": 0.3833853304386139, "learning_rate": 3.816335378694812e-05, "loss": 2.7679, "step": 7286 }, { "epoch": 0.5975301050473995, "grad_norm": 0.35013169050216675, "learning_rate": 3.813701314357801e-05, "loss": 2.693, "step": 7288 }, { "epoch": 0.5976940814757878, "grad_norm": 0.38554146885871887, "learning_rate": 3.81106759884913e-05, "loss": 2.7149, "step": 7290 }, { "epoch": 0.5978580579041762, "grad_norm": 0.39514926075935364, "learning_rate": 3.808434232943234e-05, "loss": 2.7638, "step": 7292 }, { "epoch": 0.5980220343325647, "grad_norm": 0.4564327299594879, "learning_rate": 3.805801217414454e-05, "loss": 2.7505, "step": 7294 }, { "epoch": 0.5981860107609531, "grad_norm": 0.41196030378341675, "learning_rate": 3.8031685530370184e-05, "loss": 2.7307, "step": 7296 }, { "epoch": 0.5983499871893415, "grad_norm": 0.3923788368701935, "learning_rate": 3.8005362405850554e-05, "loss": 2.7429, "step": 7298 }, { "epoch": 0.5985139636177299, "grad_norm": 0.40541306138038635, "learning_rate": 3.7979042808325925e-05, "loss": 2.7411, "step": 7300 }, { "epoch": 0.5986779400461184, "grad_norm": 0.37198057770729065, "learning_rate": 3.795272674553551e-05, "loss": 2.7234, "step": 7302 }, { "epoch": 0.5988419164745068, "grad_norm": 0.4197216033935547, "learning_rate": 3.792641422521746e-05, "loss": 2.7148, "step": 7304 }, { "epoch": 0.5990058929028952, "grad_norm": 0.4193935692310333, "learning_rate": 3.7900105255108956e-05, "loss": 2.7489, "step": 7306 }, { "epoch": 0.5991698693312836, "grad_norm": 0.434123158454895, "learning_rate": 3.787379984294605e-05, "loss": 2.7359, "step": 7308 }, { "epoch": 0.5993338457596721, "grad_norm": 0.42082515358924866, "learning_rate": 3.784749799646377e-05, "loss": 2.7424, "step": 7310 }, { "epoch": 0.5994978221880605, "grad_norm": 0.4002670347690582, "learning_rate": 3.782119972339616e-05, "loss": 2.7249, "step": 7312 }, { "epoch": 0.5996617986164489, "grad_norm": 0.3787383735179901, "learning_rate": 3.779490503147615e-05, "loss": 2.7341, "step": 7314 }, { "epoch": 0.5998257750448373, "grad_norm": 0.3705497682094574, "learning_rate": 3.776861392843561e-05, "loss": 2.7528, "step": 7316 }, { "epoch": 0.5999897514732258, "grad_norm": 0.38035187125205994, "learning_rate": 3.774232642200541e-05, "loss": 2.7124, "step": 7318 }, { "epoch": 0.6001537279016141, "grad_norm": 0.41400671005249023, "learning_rate": 3.771604251991532e-05, "loss": 2.7601, "step": 7320 }, { "epoch": 0.6003177043300025, "grad_norm": 0.41931360960006714, "learning_rate": 3.768976222989402e-05, "loss": 2.7184, "step": 7322 }, { "epoch": 0.600481680758391, "grad_norm": 0.41194209456443787, "learning_rate": 3.766348555966923e-05, "loss": 2.6892, "step": 7324 }, { "epoch": 0.6006456571867794, "grad_norm": 0.4093153178691864, "learning_rate": 3.763721251696751e-05, "loss": 2.7387, "step": 7326 }, { "epoch": 0.6008096336151678, "grad_norm": 0.44898512959480286, "learning_rate": 3.761094310951439e-05, "loss": 2.7957, "step": 7328 }, { "epoch": 0.6009736100435562, "grad_norm": 0.4230307340621948, "learning_rate": 3.758467734503433e-05, "loss": 2.7205, "step": 7330 }, { "epoch": 0.6011375864719447, "grad_norm": 0.42397361993789673, "learning_rate": 3.755841523125072e-05, "loss": 2.6792, "step": 7332 }, { "epoch": 0.6013015629003331, "grad_norm": 0.38022592663764954, "learning_rate": 3.753215677588584e-05, "loss": 2.7582, "step": 7334 }, { "epoch": 0.6014655393287215, "grad_norm": 0.4053349494934082, "learning_rate": 3.7505901986660974e-05, "loss": 2.6986, "step": 7336 }, { "epoch": 0.6016295157571099, "grad_norm": 0.39706191420555115, "learning_rate": 3.747965087129627e-05, "loss": 2.6879, "step": 7338 }, { "epoch": 0.6017934921854984, "grad_norm": 0.39096325635910034, "learning_rate": 3.745340343751077e-05, "loss": 2.758, "step": 7340 }, { "epoch": 0.6019574686138868, "grad_norm": 0.3869808614253998, "learning_rate": 3.742715969302252e-05, "loss": 2.7402, "step": 7342 }, { "epoch": 0.6021214450422752, "grad_norm": 0.4089089035987854, "learning_rate": 3.74009196455484e-05, "loss": 2.6903, "step": 7344 }, { "epoch": 0.6022854214706636, "grad_norm": 0.41569820046424866, "learning_rate": 3.7374683302804236e-05, "loss": 2.7006, "step": 7346 }, { "epoch": 0.602449397899052, "grad_norm": 0.39513155817985535, "learning_rate": 3.734845067250479e-05, "loss": 2.7673, "step": 7348 }, { "epoch": 0.6026133743274404, "grad_norm": 0.41469231247901917, "learning_rate": 3.732222176236369e-05, "loss": 2.7501, "step": 7350 }, { "epoch": 0.6027773507558288, "grad_norm": 0.4159885346889496, "learning_rate": 3.729599658009346e-05, "loss": 2.748, "step": 7352 }, { "epoch": 0.6029413271842172, "grad_norm": 0.4074746370315552, "learning_rate": 3.7269775133405594e-05, "loss": 2.745, "step": 7354 }, { "epoch": 0.6031053036126057, "grad_norm": 0.40688809752464294, "learning_rate": 3.7243557430010424e-05, "loss": 2.7292, "step": 7356 }, { "epoch": 0.6032692800409941, "grad_norm": 0.38384461402893066, "learning_rate": 3.7217343477617184e-05, "loss": 2.736, "step": 7358 }, { "epoch": 0.6034332564693825, "grad_norm": 0.3964248299598694, "learning_rate": 3.719113328393408e-05, "loss": 2.698, "step": 7360 }, { "epoch": 0.6035972328977709, "grad_norm": 0.43690356612205505, "learning_rate": 3.7164926856668115e-05, "loss": 2.7602, "step": 7362 }, { "epoch": 0.6037612093261594, "grad_norm": 0.40876147150993347, "learning_rate": 3.7138724203525215e-05, "loss": 2.7014, "step": 7364 }, { "epoch": 0.6039251857545478, "grad_norm": 0.42225024104118347, "learning_rate": 3.7112525332210255e-05, "loss": 2.7621, "step": 7366 }, { "epoch": 0.6040891621829362, "grad_norm": 0.3870079517364502, "learning_rate": 3.708633025042694e-05, "loss": 2.7207, "step": 7368 }, { "epoch": 0.6042531386113246, "grad_norm": 0.3651062548160553, "learning_rate": 3.7060138965877835e-05, "loss": 2.704, "step": 7370 }, { "epoch": 0.6044171150397131, "grad_norm": 0.3923655152320862, "learning_rate": 3.703395148626447e-05, "loss": 2.7833, "step": 7372 }, { "epoch": 0.6045810914681015, "grad_norm": 0.40090101957321167, "learning_rate": 3.7007767819287195e-05, "loss": 2.747, "step": 7374 }, { "epoch": 0.6047450678964899, "grad_norm": 0.3791881203651428, "learning_rate": 3.698158797264524e-05, "loss": 2.7639, "step": 7376 }, { "epoch": 0.6049090443248784, "grad_norm": 0.4073001444339752, "learning_rate": 3.6955411954036755e-05, "loss": 2.707, "step": 7378 }, { "epoch": 0.6050730207532667, "grad_norm": 0.3848922550678253, "learning_rate": 3.692923977115872e-05, "loss": 2.7157, "step": 7380 }, { "epoch": 0.6052369971816551, "grad_norm": 0.3976440727710724, "learning_rate": 3.6903071431706994e-05, "loss": 2.7507, "step": 7382 }, { "epoch": 0.6054009736100435, "grad_norm": 0.431142121553421, "learning_rate": 3.687690694337634e-05, "loss": 2.7576, "step": 7384 }, { "epoch": 0.605564950038432, "grad_norm": 0.40304774045944214, "learning_rate": 3.685074631386036e-05, "loss": 2.7305, "step": 7386 }, { "epoch": 0.6057289264668204, "grad_norm": 0.3595426678657532, "learning_rate": 3.6824589550851495e-05, "loss": 2.7312, "step": 7388 }, { "epoch": 0.6058929028952088, "grad_norm": 0.3594333827495575, "learning_rate": 3.679843666204113e-05, "loss": 2.7613, "step": 7390 }, { "epoch": 0.6060568793235972, "grad_norm": 0.3855232000350952, "learning_rate": 3.677228765511943e-05, "loss": 2.7281, "step": 7392 }, { "epoch": 0.6062208557519857, "grad_norm": 0.41874638199806213, "learning_rate": 3.6746142537775443e-05, "loss": 2.7317, "step": 7394 }, { "epoch": 0.6063848321803741, "grad_norm": 0.3978760838508606, "learning_rate": 3.672000131769709e-05, "loss": 2.7232, "step": 7396 }, { "epoch": 0.6065488086087625, "grad_norm": 0.38561245799064636, "learning_rate": 3.6693864002571144e-05, "loss": 2.746, "step": 7398 }, { "epoch": 0.6067127850371509, "grad_norm": 0.3927624821662903, "learning_rate": 3.66677306000832e-05, "loss": 2.7364, "step": 7400 }, { "epoch": 0.6068767614655394, "grad_norm": 0.3884304463863373, "learning_rate": 3.664160111791775e-05, "loss": 2.7298, "step": 7402 }, { "epoch": 0.6070407378939278, "grad_norm": 0.4252575933933258, "learning_rate": 3.6615475563758086e-05, "loss": 2.707, "step": 7404 }, { "epoch": 0.6072047143223162, "grad_norm": 0.40555331110954285, "learning_rate": 3.6589353945286354e-05, "loss": 2.6956, "step": 7406 }, { "epoch": 0.6073686907507045, "grad_norm": 0.39419540762901306, "learning_rate": 3.6563236270183596e-05, "loss": 2.7786, "step": 7408 }, { "epoch": 0.607532667179093, "grad_norm": 0.3671490550041199, "learning_rate": 3.653712254612962e-05, "loss": 2.7485, "step": 7410 }, { "epoch": 0.6076966436074814, "grad_norm": 0.3971266448497772, "learning_rate": 3.6511012780803105e-05, "loss": 2.7043, "step": 7412 }, { "epoch": 0.6078606200358698, "grad_norm": 0.38373613357543945, "learning_rate": 3.648490698188159e-05, "loss": 2.7835, "step": 7414 }, { "epoch": 0.6080245964642582, "grad_norm": 0.3933662474155426, "learning_rate": 3.645880515704141e-05, "loss": 2.6655, "step": 7416 }, { "epoch": 0.6081885728926467, "grad_norm": 0.3658972978591919, "learning_rate": 3.6432707313957736e-05, "loss": 2.7397, "step": 7418 }, { "epoch": 0.6083525493210351, "grad_norm": 0.37315210700035095, "learning_rate": 3.640661346030459e-05, "loss": 2.7642, "step": 7420 }, { "epoch": 0.6085165257494235, "grad_norm": 0.4116625487804413, "learning_rate": 3.638052360375481e-05, "loss": 2.6983, "step": 7422 }, { "epoch": 0.6086805021778119, "grad_norm": 0.4126829206943512, "learning_rate": 3.635443775198004e-05, "loss": 2.7407, "step": 7424 }, { "epoch": 0.6088444786062004, "grad_norm": 0.40081050992012024, "learning_rate": 3.6328355912650794e-05, "loss": 2.721, "step": 7426 }, { "epoch": 0.6090084550345888, "grad_norm": 0.39633798599243164, "learning_rate": 3.630227809343634e-05, "loss": 2.7672, "step": 7428 }, { "epoch": 0.6091724314629772, "grad_norm": 0.36122927069664, "learning_rate": 3.627620430200481e-05, "loss": 2.7668, "step": 7430 }, { "epoch": 0.6093364078913656, "grad_norm": 0.38888874650001526, "learning_rate": 3.625013454602316e-05, "loss": 2.7688, "step": 7432 }, { "epoch": 0.6095003843197541, "grad_norm": 0.3885965645313263, "learning_rate": 3.6224068833157135e-05, "loss": 2.7836, "step": 7434 }, { "epoch": 0.6096643607481425, "grad_norm": 0.41673019528388977, "learning_rate": 3.619800717107126e-05, "loss": 2.7014, "step": 7436 }, { "epoch": 0.6098283371765308, "grad_norm": 0.4096967875957489, "learning_rate": 3.6171949567428956e-05, "loss": 2.7139, "step": 7438 }, { "epoch": 0.6099923136049193, "grad_norm": 0.3758758306503296, "learning_rate": 3.614589602989238e-05, "loss": 2.6619, "step": 7440 }, { "epoch": 0.6101562900333077, "grad_norm": 0.4018336832523346, "learning_rate": 3.6119846566122505e-05, "loss": 2.6948, "step": 7442 }, { "epoch": 0.6103202664616961, "grad_norm": 0.402338445186615, "learning_rate": 3.609380118377913e-05, "loss": 2.768, "step": 7444 }, { "epoch": 0.6104842428900845, "grad_norm": 0.3775213360786438, "learning_rate": 3.606775989052083e-05, "loss": 2.7255, "step": 7446 }, { "epoch": 0.610648219318473, "grad_norm": 0.4092825651168823, "learning_rate": 3.6041722694004964e-05, "loss": 2.7224, "step": 7448 }, { "epoch": 0.6108121957468614, "grad_norm": 0.3705921769142151, "learning_rate": 3.6015689601887746e-05, "loss": 2.667, "step": 7450 }, { "epoch": 0.6109761721752498, "grad_norm": 0.39161479473114014, "learning_rate": 3.598966062182414e-05, "loss": 2.7119, "step": 7452 }, { "epoch": 0.6111401486036382, "grad_norm": 0.3508928120136261, "learning_rate": 3.596363576146787e-05, "loss": 2.7114, "step": 7454 }, { "epoch": 0.6113041250320267, "grad_norm": 0.3898743987083435, "learning_rate": 3.5937615028471536e-05, "loss": 2.7308, "step": 7456 }, { "epoch": 0.6114681014604151, "grad_norm": 0.40665403008461, "learning_rate": 3.591159843048645e-05, "loss": 2.6795, "step": 7458 }, { "epoch": 0.6116320778888035, "grad_norm": 0.39401838183403015, "learning_rate": 3.5885585975162706e-05, "loss": 2.7645, "step": 7460 }, { "epoch": 0.6117960543171919, "grad_norm": 0.39833781123161316, "learning_rate": 3.585957767014925e-05, "loss": 2.7333, "step": 7462 }, { "epoch": 0.6119600307455804, "grad_norm": 0.3788768947124481, "learning_rate": 3.583357352309374e-05, "loss": 2.7235, "step": 7464 }, { "epoch": 0.6121240071739688, "grad_norm": 0.3793696165084839, "learning_rate": 3.580757354164264e-05, "loss": 2.6877, "step": 7466 }, { "epoch": 0.6122879836023571, "grad_norm": 0.4045318067073822, "learning_rate": 3.578157773344118e-05, "loss": 2.6522, "step": 7468 }, { "epoch": 0.6124519600307455, "grad_norm": 0.426973432302475, "learning_rate": 3.5755586106133366e-05, "loss": 2.6703, "step": 7470 }, { "epoch": 0.612615936459134, "grad_norm": 0.4132937788963318, "learning_rate": 3.572959866736196e-05, "loss": 2.746, "step": 7472 }, { "epoch": 0.6127799128875224, "grad_norm": 0.38171377778053284, "learning_rate": 3.570361542476855e-05, "loss": 2.7185, "step": 7474 }, { "epoch": 0.6129438893159108, "grad_norm": 0.40772563219070435, "learning_rate": 3.56776363859934e-05, "loss": 2.7168, "step": 7476 }, { "epoch": 0.6131078657442992, "grad_norm": 0.4192030131816864, "learning_rate": 3.56516615586756e-05, "loss": 2.7037, "step": 7478 }, { "epoch": 0.6132718421726877, "grad_norm": 0.3770076632499695, "learning_rate": 3.5625690950453006e-05, "loss": 2.8116, "step": 7480 }, { "epoch": 0.6134358186010761, "grad_norm": 0.39449840784072876, "learning_rate": 3.559972456896221e-05, "loss": 2.7704, "step": 7482 }, { "epoch": 0.6135997950294645, "grad_norm": 0.39838895201683044, "learning_rate": 3.557376242183853e-05, "loss": 2.7817, "step": 7484 }, { "epoch": 0.6137637714578529, "grad_norm": 0.3995193541049957, "learning_rate": 3.554780451671612e-05, "loss": 2.6982, "step": 7486 }, { "epoch": 0.6139277478862414, "grad_norm": 0.3879450857639313, "learning_rate": 3.552185086122782e-05, "loss": 2.7279, "step": 7488 }, { "epoch": 0.6140917243146298, "grad_norm": 0.39906418323516846, "learning_rate": 3.549590146300524e-05, "loss": 2.698, "step": 7490 }, { "epoch": 0.6142557007430182, "grad_norm": 0.40238451957702637, "learning_rate": 3.546995632967875e-05, "loss": 2.7944, "step": 7492 }, { "epoch": 0.6144196771714067, "grad_norm": 0.3660317063331604, "learning_rate": 3.544401546887745e-05, "loss": 2.7049, "step": 7494 }, { "epoch": 0.614583653599795, "grad_norm": 0.39529040455818176, "learning_rate": 3.5418078888229166e-05, "loss": 2.7674, "step": 7496 }, { "epoch": 0.6147476300281834, "grad_norm": 0.41775986552238464, "learning_rate": 3.539214659536053e-05, "loss": 2.7501, "step": 7498 }, { "epoch": 0.6149116064565718, "grad_norm": 0.37571340799331665, "learning_rate": 3.536621859789685e-05, "loss": 2.7063, "step": 7500 }, { "epoch": 0.6150755828849603, "grad_norm": 0.37494418025016785, "learning_rate": 3.534029490346217e-05, "loss": 2.7466, "step": 7502 }, { "epoch": 0.6152395593133487, "grad_norm": 0.396066278219223, "learning_rate": 3.5314375519679345e-05, "loss": 2.7053, "step": 7504 }, { "epoch": 0.6154035357417371, "grad_norm": 0.4173499345779419, "learning_rate": 3.528846045416987e-05, "loss": 2.7273, "step": 7506 }, { "epoch": 0.6155675121701255, "grad_norm": 0.41211748123168945, "learning_rate": 3.5262549714554e-05, "loss": 2.6959, "step": 7508 }, { "epoch": 0.615731488598514, "grad_norm": 0.3835853934288025, "learning_rate": 3.523664330845077e-05, "loss": 2.7235, "step": 7510 }, { "epoch": 0.6158954650269024, "grad_norm": 0.3758532702922821, "learning_rate": 3.5210741243477876e-05, "loss": 2.6909, "step": 7512 }, { "epoch": 0.6160594414552908, "grad_norm": 0.3795211613178253, "learning_rate": 3.5184843527251745e-05, "loss": 2.7192, "step": 7514 }, { "epoch": 0.6162234178836792, "grad_norm": 0.3767675459384918, "learning_rate": 3.5158950167387564e-05, "loss": 2.7601, "step": 7516 }, { "epoch": 0.6163873943120677, "grad_norm": 0.39331692457199097, "learning_rate": 3.513306117149919e-05, "loss": 2.7386, "step": 7518 }, { "epoch": 0.6165513707404561, "grad_norm": 0.408627986907959, "learning_rate": 3.5107176547199224e-05, "loss": 2.6876, "step": 7520 }, { "epoch": 0.6167153471688445, "grad_norm": 0.4065479636192322, "learning_rate": 3.508129630209901e-05, "loss": 2.7143, "step": 7522 }, { "epoch": 0.6168793235972329, "grad_norm": 0.38085782527923584, "learning_rate": 3.505542044380855e-05, "loss": 2.7079, "step": 7524 }, { "epoch": 0.6170433000256214, "grad_norm": 0.39370179176330566, "learning_rate": 3.5029548979936556e-05, "loss": 2.7574, "step": 7526 }, { "epoch": 0.6172072764540097, "grad_norm": 0.3861037790775299, "learning_rate": 3.5003681918090514e-05, "loss": 2.7286, "step": 7528 }, { "epoch": 0.6173712528823981, "grad_norm": 0.3807239830493927, "learning_rate": 3.497781926587657e-05, "loss": 2.7274, "step": 7530 }, { "epoch": 0.6175352293107865, "grad_norm": 0.419541597366333, "learning_rate": 3.495196103089953e-05, "loss": 2.7384, "step": 7532 }, { "epoch": 0.617699205739175, "grad_norm": 0.415090411901474, "learning_rate": 3.492610722076299e-05, "loss": 2.7689, "step": 7534 }, { "epoch": 0.6178631821675634, "grad_norm": 0.4102879762649536, "learning_rate": 3.49002578430692e-05, "loss": 2.717, "step": 7536 }, { "epoch": 0.6180271585959518, "grad_norm": 0.38376542925834656, "learning_rate": 3.487441290541909e-05, "loss": 2.7256, "step": 7538 }, { "epoch": 0.6181911350243402, "grad_norm": 0.3987720012664795, "learning_rate": 3.484857241541232e-05, "loss": 2.7632, "step": 7540 }, { "epoch": 0.6183551114527287, "grad_norm": 0.3795928657054901, "learning_rate": 3.4822736380647214e-05, "loss": 2.7228, "step": 7542 }, { "epoch": 0.6185190878811171, "grad_norm": 0.3663689196109772, "learning_rate": 3.479690480872079e-05, "loss": 2.7262, "step": 7544 }, { "epoch": 0.6186830643095055, "grad_norm": 0.40121135115623474, "learning_rate": 3.4771077707228784e-05, "loss": 2.7431, "step": 7546 }, { "epoch": 0.618847040737894, "grad_norm": 0.37954187393188477, "learning_rate": 3.474525508376558e-05, "loss": 2.7436, "step": 7548 }, { "epoch": 0.6190110171662824, "grad_norm": 0.4009801745414734, "learning_rate": 3.471943694592425e-05, "loss": 2.7057, "step": 7550 }, { "epoch": 0.6191749935946708, "grad_norm": 0.38822048902511597, "learning_rate": 3.4693623301296584e-05, "loss": 2.7042, "step": 7552 }, { "epoch": 0.6193389700230592, "grad_norm": 0.36688941717147827, "learning_rate": 3.466781415747301e-05, "loss": 2.7661, "step": 7554 }, { "epoch": 0.6195029464514477, "grad_norm": 0.3665444552898407, "learning_rate": 3.464200952204264e-05, "loss": 2.6979, "step": 7556 }, { "epoch": 0.619666922879836, "grad_norm": 0.378578245639801, "learning_rate": 3.461620940259327e-05, "loss": 2.7311, "step": 7558 }, { "epoch": 0.6198308993082244, "grad_norm": 0.37778717279434204, "learning_rate": 3.459041380671136e-05, "loss": 2.7344, "step": 7560 }, { "epoch": 0.6199948757366128, "grad_norm": 0.3607989549636841, "learning_rate": 3.456462274198205e-05, "loss": 2.7406, "step": 7562 }, { "epoch": 0.6201588521650013, "grad_norm": 0.39484044909477234, "learning_rate": 3.453883621598915e-05, "loss": 2.7612, "step": 7564 }, { "epoch": 0.6203228285933897, "grad_norm": 0.3921862542629242, "learning_rate": 3.4513054236315125e-05, "loss": 2.7216, "step": 7566 }, { "epoch": 0.6204868050217781, "grad_norm": 0.39838817715644836, "learning_rate": 3.448727681054107e-05, "loss": 2.7325, "step": 7568 }, { "epoch": 0.6206507814501665, "grad_norm": 0.4075007438659668, "learning_rate": 3.4461503946246835e-05, "loss": 2.73, "step": 7570 }, { "epoch": 0.620814757878555, "grad_norm": 0.3774442970752716, "learning_rate": 3.4435735651010836e-05, "loss": 2.7211, "step": 7572 }, { "epoch": 0.6209787343069434, "grad_norm": 0.3737916052341461, "learning_rate": 3.440997193241016e-05, "loss": 2.7158, "step": 7574 }, { "epoch": 0.6211427107353318, "grad_norm": 0.377712607383728, "learning_rate": 3.438421279802062e-05, "loss": 2.7244, "step": 7576 }, { "epoch": 0.6213066871637202, "grad_norm": 0.38349977135658264, "learning_rate": 3.4358458255416603e-05, "loss": 2.7893, "step": 7578 }, { "epoch": 0.6214706635921087, "grad_norm": 0.39397573471069336, "learning_rate": 3.433270831217116e-05, "loss": 2.6496, "step": 7580 }, { "epoch": 0.6216346400204971, "grad_norm": 0.40148624777793884, "learning_rate": 3.430696297585602e-05, "loss": 2.6999, "step": 7582 }, { "epoch": 0.6217986164488855, "grad_norm": 0.38889259099960327, "learning_rate": 3.4281222254041525e-05, "loss": 2.8026, "step": 7584 }, { "epoch": 0.6219625928772738, "grad_norm": 0.3647606670856476, "learning_rate": 3.425548615429666e-05, "loss": 2.7333, "step": 7586 }, { "epoch": 0.6221265693056623, "grad_norm": 0.3934035301208496, "learning_rate": 3.422975468418911e-05, "loss": 2.7353, "step": 7588 }, { "epoch": 0.6222905457340507, "grad_norm": 0.37733393907546997, "learning_rate": 3.4204027851285114e-05, "loss": 2.7875, "step": 7590 }, { "epoch": 0.6224545221624391, "grad_norm": 0.38836774230003357, "learning_rate": 3.417830566314959e-05, "loss": 2.6995, "step": 7592 }, { "epoch": 0.6226184985908275, "grad_norm": 0.40056899189949036, "learning_rate": 3.415258812734612e-05, "loss": 2.7682, "step": 7594 }, { "epoch": 0.622782475019216, "grad_norm": 0.3677942454814911, "learning_rate": 3.412687525143685e-05, "loss": 2.6626, "step": 7596 }, { "epoch": 0.6229464514476044, "grad_norm": 0.3932241201400757, "learning_rate": 3.410116704298259e-05, "loss": 2.7325, "step": 7598 }, { "epoch": 0.6231104278759928, "grad_norm": 0.39545226097106934, "learning_rate": 3.407546350954281e-05, "loss": 2.7212, "step": 7600 }, { "epoch": 0.6232744043043812, "grad_norm": 0.3952122628688812, "learning_rate": 3.4049764658675556e-05, "loss": 2.7139, "step": 7602 }, { "epoch": 0.6234383807327697, "grad_norm": 0.380193829536438, "learning_rate": 3.4024070497937496e-05, "loss": 2.6838, "step": 7604 }, { "epoch": 0.6236023571611581, "grad_norm": 0.3770444989204407, "learning_rate": 3.399838103488397e-05, "loss": 2.7072, "step": 7606 }, { "epoch": 0.6237663335895465, "grad_norm": 0.3546270430088043, "learning_rate": 3.3972696277068885e-05, "loss": 2.7474, "step": 7608 }, { "epoch": 0.623930310017935, "grad_norm": 0.3864762485027313, "learning_rate": 3.394701623204477e-05, "loss": 2.706, "step": 7610 }, { "epoch": 0.6240942864463234, "grad_norm": 0.3852376937866211, "learning_rate": 3.392134090736282e-05, "loss": 2.6709, "step": 7612 }, { "epoch": 0.6242582628747118, "grad_norm": 0.3920847773551941, "learning_rate": 3.389567031057278e-05, "loss": 2.7288, "step": 7614 }, { "epoch": 0.6244222393031001, "grad_norm": 0.39720067381858826, "learning_rate": 3.387000444922301e-05, "loss": 2.7414, "step": 7616 }, { "epoch": 0.6245862157314886, "grad_norm": 0.384258508682251, "learning_rate": 3.384434333086054e-05, "loss": 2.6749, "step": 7618 }, { "epoch": 0.624750192159877, "grad_norm": 0.37747853994369507, "learning_rate": 3.381868696303094e-05, "loss": 2.7169, "step": 7620 }, { "epoch": 0.6249141685882654, "grad_norm": 0.38757556676864624, "learning_rate": 3.379303535327838e-05, "loss": 2.7312, "step": 7622 }, { "epoch": 0.6250781450166538, "grad_norm": 0.3916012942790985, "learning_rate": 3.376738850914571e-05, "loss": 2.7481, "step": 7624 }, { "epoch": 0.6252421214450423, "grad_norm": 0.37376511096954346, "learning_rate": 3.374174643817428e-05, "loss": 2.711, "step": 7626 }, { "epoch": 0.6254060978734307, "grad_norm": 0.39975661039352417, "learning_rate": 3.3716109147904094e-05, "loss": 2.7318, "step": 7628 }, { "epoch": 0.6255700743018191, "grad_norm": 0.41789987683296204, "learning_rate": 3.369047664587375e-05, "loss": 2.7321, "step": 7630 }, { "epoch": 0.6257340507302075, "grad_norm": 0.39743414521217346, "learning_rate": 3.3664848939620406e-05, "loss": 2.7413, "step": 7632 }, { "epoch": 0.625898027158596, "grad_norm": 0.3793678879737854, "learning_rate": 3.363922603667982e-05, "loss": 2.6591, "step": 7634 }, { "epoch": 0.6260620035869844, "grad_norm": 0.3701624274253845, "learning_rate": 3.3613607944586374e-05, "loss": 2.7254, "step": 7636 }, { "epoch": 0.6262259800153728, "grad_norm": 0.3606198728084564, "learning_rate": 3.3587994670873e-05, "loss": 2.7231, "step": 7638 }, { "epoch": 0.6263899564437612, "grad_norm": 0.376678466796875, "learning_rate": 3.356238622307119e-05, "loss": 2.7302, "step": 7640 }, { "epoch": 0.6265539328721497, "grad_norm": 0.4293898046016693, "learning_rate": 3.3536782608711105e-05, "loss": 2.6861, "step": 7642 }, { "epoch": 0.6267179093005381, "grad_norm": 0.4140755236148834, "learning_rate": 3.3511183835321394e-05, "loss": 2.694, "step": 7644 }, { "epoch": 0.6268818857289264, "grad_norm": 0.41748470067977905, "learning_rate": 3.348558991042929e-05, "loss": 2.7329, "step": 7646 }, { "epoch": 0.6270458621573148, "grad_norm": 0.3913920819759369, "learning_rate": 3.346000084156069e-05, "loss": 2.7399, "step": 7648 }, { "epoch": 0.6272098385857033, "grad_norm": 0.38955986499786377, "learning_rate": 3.343441663623995e-05, "loss": 2.6881, "step": 7650 }, { "epoch": 0.6273738150140917, "grad_norm": 0.38670194149017334, "learning_rate": 3.340883730199006e-05, "loss": 2.712, "step": 7652 }, { "epoch": 0.6275377914424801, "grad_norm": 0.38310298323631287, "learning_rate": 3.338326284633257e-05, "loss": 2.7457, "step": 7654 }, { "epoch": 0.6277017678708685, "grad_norm": 0.3795510232448578, "learning_rate": 3.335769327678759e-05, "loss": 2.748, "step": 7656 }, { "epoch": 0.627865744299257, "grad_norm": 0.37910598516464233, "learning_rate": 3.333212860087375e-05, "loss": 2.751, "step": 7658 }, { "epoch": 0.6280297207276454, "grad_norm": 0.37041279673576355, "learning_rate": 3.330656882610835e-05, "loss": 2.7115, "step": 7660 }, { "epoch": 0.6281936971560338, "grad_norm": 0.37564611434936523, "learning_rate": 3.328101396000714e-05, "loss": 2.6771, "step": 7662 }, { "epoch": 0.6283576735844223, "grad_norm": 0.37373894453048706, "learning_rate": 3.325546401008446e-05, "loss": 2.7083, "step": 7664 }, { "epoch": 0.6285216500128107, "grad_norm": 0.41689133644104004, "learning_rate": 3.3229918983853245e-05, "loss": 2.7567, "step": 7666 }, { "epoch": 0.6286856264411991, "grad_norm": 0.3869902193546295, "learning_rate": 3.3204378888824926e-05, "loss": 2.7269, "step": 7668 }, { "epoch": 0.6288496028695875, "grad_norm": 0.3627869188785553, "learning_rate": 3.317884373250953e-05, "loss": 2.7304, "step": 7670 }, { "epoch": 0.629013579297976, "grad_norm": 0.372374027967453, "learning_rate": 3.315331352241559e-05, "loss": 2.6885, "step": 7672 }, { "epoch": 0.6291775557263644, "grad_norm": 0.40186235308647156, "learning_rate": 3.3127788266050195e-05, "loss": 2.7233, "step": 7674 }, { "epoch": 0.6293415321547527, "grad_norm": 0.4125675857067108, "learning_rate": 3.3102267970919023e-05, "loss": 2.7048, "step": 7676 }, { "epoch": 0.6295055085831411, "grad_norm": 0.3935837745666504, "learning_rate": 3.307675264452624e-05, "loss": 2.7096, "step": 7678 }, { "epoch": 0.6296694850115296, "grad_norm": 0.4198305904865265, "learning_rate": 3.305124229437453e-05, "loss": 2.7227, "step": 7680 }, { "epoch": 0.629833461439918, "grad_norm": 0.3819190561771393, "learning_rate": 3.302573692796522e-05, "loss": 2.735, "step": 7682 }, { "epoch": 0.6299974378683064, "grad_norm": 0.38034242391586304, "learning_rate": 3.300023655279806e-05, "loss": 2.7015, "step": 7684 }, { "epoch": 0.6301614142966948, "grad_norm": 0.37957313656806946, "learning_rate": 3.297474117637136e-05, "loss": 2.6697, "step": 7686 }, { "epoch": 0.6303253907250833, "grad_norm": 0.3742494285106659, "learning_rate": 3.294925080618202e-05, "loss": 2.7666, "step": 7688 }, { "epoch": 0.6304893671534717, "grad_norm": 0.3573929965496063, "learning_rate": 3.292376544972541e-05, "loss": 2.729, "step": 7690 }, { "epoch": 0.6306533435818601, "grad_norm": 0.370976060628891, "learning_rate": 3.289828511449541e-05, "loss": 2.7811, "step": 7692 }, { "epoch": 0.6308173200102485, "grad_norm": 0.40317147970199585, "learning_rate": 3.28728098079845e-05, "loss": 2.7098, "step": 7694 }, { "epoch": 0.630981296438637, "grad_norm": 0.36356374621391296, "learning_rate": 3.28473395376836e-05, "loss": 2.7561, "step": 7696 }, { "epoch": 0.6311452728670254, "grad_norm": 0.4060159921646118, "learning_rate": 3.282187431108216e-05, "loss": 2.727, "step": 7698 }, { "epoch": 0.6313092492954138, "grad_norm": 0.38442477583885193, "learning_rate": 3.279641413566823e-05, "loss": 2.7332, "step": 7700 }, { "epoch": 0.6314732257238022, "grad_norm": 0.3743780851364136, "learning_rate": 3.2770959018928296e-05, "loss": 2.6629, "step": 7702 }, { "epoch": 0.6316372021521907, "grad_norm": 0.3966967463493347, "learning_rate": 3.2745508968347325e-05, "loss": 2.7197, "step": 7704 }, { "epoch": 0.631801178580579, "grad_norm": 0.37655702233314514, "learning_rate": 3.2720063991408924e-05, "loss": 2.6881, "step": 7706 }, { "epoch": 0.6319651550089674, "grad_norm": 0.3916068375110626, "learning_rate": 3.2694624095595084e-05, "loss": 2.7066, "step": 7708 }, { "epoch": 0.6321291314373558, "grad_norm": 0.3936549425125122, "learning_rate": 3.2669189288386325e-05, "loss": 2.7219, "step": 7710 }, { "epoch": 0.6322931078657443, "grad_norm": 0.3706667423248291, "learning_rate": 3.2643759577261744e-05, "loss": 2.6886, "step": 7712 }, { "epoch": 0.6324570842941327, "grad_norm": 0.3641817271709442, "learning_rate": 3.261833496969886e-05, "loss": 2.7684, "step": 7714 }, { "epoch": 0.6326210607225211, "grad_norm": 0.3743795156478882, "learning_rate": 3.259291547317372e-05, "loss": 2.6686, "step": 7716 }, { "epoch": 0.6327850371509095, "grad_norm": 0.3651292324066162, "learning_rate": 3.256750109516087e-05, "loss": 2.7053, "step": 7718 }, { "epoch": 0.632949013579298, "grad_norm": 0.3830392062664032, "learning_rate": 3.254209184313336e-05, "loss": 2.7814, "step": 7720 }, { "epoch": 0.6331129900076864, "grad_norm": 0.3942374587059021, "learning_rate": 3.251668772456268e-05, "loss": 2.6528, "step": 7722 }, { "epoch": 0.6332769664360748, "grad_norm": 0.41045206785202026, "learning_rate": 3.2491288746918903e-05, "loss": 2.7269, "step": 7724 }, { "epoch": 0.6334409428644633, "grad_norm": 0.41689518094062805, "learning_rate": 3.246589491767051e-05, "loss": 2.6991, "step": 7726 }, { "epoch": 0.6336049192928517, "grad_norm": 0.39930638670921326, "learning_rate": 3.2440506244284484e-05, "loss": 2.7544, "step": 7728 }, { "epoch": 0.6337688957212401, "grad_norm": 0.42437562346458435, "learning_rate": 3.241512273422635e-05, "loss": 2.7116, "step": 7730 }, { "epoch": 0.6339328721496285, "grad_norm": 0.42422398924827576, "learning_rate": 3.238974439496003e-05, "loss": 2.6951, "step": 7732 }, { "epoch": 0.634096848578017, "grad_norm": 0.42158007621765137, "learning_rate": 3.2364371233947964e-05, "loss": 2.7489, "step": 7734 }, { "epoch": 0.6342608250064053, "grad_norm": 0.3969306945800781, "learning_rate": 3.2339003258651104e-05, "loss": 2.7346, "step": 7736 }, { "epoch": 0.6344248014347937, "grad_norm": 0.40741413831710815, "learning_rate": 3.231364047652882e-05, "loss": 2.7646, "step": 7738 }, { "epoch": 0.6345887778631821, "grad_norm": 0.3900740146636963, "learning_rate": 3.228828289503898e-05, "loss": 2.7345, "step": 7740 }, { "epoch": 0.6347527542915706, "grad_norm": 0.3706361651420593, "learning_rate": 3.226293052163792e-05, "loss": 2.6459, "step": 7742 }, { "epoch": 0.634916730719959, "grad_norm": 0.38874951004981995, "learning_rate": 3.223758336378046e-05, "loss": 2.6441, "step": 7744 }, { "epoch": 0.6350807071483474, "grad_norm": 0.40067705512046814, "learning_rate": 3.2212241428919834e-05, "loss": 2.7441, "step": 7746 }, { "epoch": 0.6352446835767358, "grad_norm": 0.3729390501976013, "learning_rate": 3.2186904724507835e-05, "loss": 2.6632, "step": 7748 }, { "epoch": 0.6354086600051243, "grad_norm": 0.3921566605567932, "learning_rate": 3.216157325799463e-05, "loss": 2.7508, "step": 7750 }, { "epoch": 0.6355726364335127, "grad_norm": 0.39209845662117004, "learning_rate": 3.213624703682885e-05, "loss": 2.7631, "step": 7752 }, { "epoch": 0.6357366128619011, "grad_norm": 0.41270387172698975, "learning_rate": 3.211092606845766e-05, "loss": 2.6897, "step": 7754 }, { "epoch": 0.6359005892902895, "grad_norm": 0.4053170084953308, "learning_rate": 3.2085610360326614e-05, "loss": 2.6658, "step": 7756 }, { "epoch": 0.636064565718678, "grad_norm": 0.38600656390190125, "learning_rate": 3.206029991987972e-05, "loss": 2.7244, "step": 7758 }, { "epoch": 0.6362285421470664, "grad_norm": 0.4255266785621643, "learning_rate": 3.203499475455948e-05, "loss": 2.6988, "step": 7760 }, { "epoch": 0.6363925185754548, "grad_norm": 0.3940472900867462, "learning_rate": 3.200969487180681e-05, "loss": 2.7547, "step": 7762 }, { "epoch": 0.6365564950038431, "grad_norm": 0.4074536859989166, "learning_rate": 3.1984400279061054e-05, "loss": 2.737, "step": 7764 }, { "epoch": 0.6367204714322316, "grad_norm": 0.3944758176803589, "learning_rate": 3.195911098376007e-05, "loss": 2.7087, "step": 7766 }, { "epoch": 0.63688444786062, "grad_norm": 0.4005754888057709, "learning_rate": 3.19338269933401e-05, "loss": 2.738, "step": 7768 }, { "epoch": 0.6370484242890084, "grad_norm": 0.3666488230228424, "learning_rate": 3.1908548315235806e-05, "loss": 2.6789, "step": 7770 }, { "epoch": 0.6372124007173968, "grad_norm": 0.3705459237098694, "learning_rate": 3.1883274956880384e-05, "loss": 2.6495, "step": 7772 }, { "epoch": 0.6373763771457853, "grad_norm": 0.37399980425834656, "learning_rate": 3.1858006925705376e-05, "loss": 2.699, "step": 7774 }, { "epoch": 0.6375403535741737, "grad_norm": 0.3848254382610321, "learning_rate": 3.183274422914077e-05, "loss": 2.6965, "step": 7776 }, { "epoch": 0.6377043300025621, "grad_norm": 0.3763097822666168, "learning_rate": 3.1807486874615035e-05, "loss": 2.7206, "step": 7778 }, { "epoch": 0.6378683064309506, "grad_norm": 0.3757327198982239, "learning_rate": 3.1782234869555027e-05, "loss": 2.6724, "step": 7780 }, { "epoch": 0.638032282859339, "grad_norm": 0.3919960856437683, "learning_rate": 3.175698822138602e-05, "loss": 2.6877, "step": 7782 }, { "epoch": 0.6381962592877274, "grad_norm": 0.37129318714141846, "learning_rate": 3.173174693753175e-05, "loss": 2.6874, "step": 7784 }, { "epoch": 0.6383602357161158, "grad_norm": 0.4006279408931732, "learning_rate": 3.1706511025414365e-05, "loss": 2.6892, "step": 7786 }, { "epoch": 0.6385242121445043, "grad_norm": 0.37489134073257446, "learning_rate": 3.16812804924544e-05, "loss": 2.7256, "step": 7788 }, { "epoch": 0.6386881885728927, "grad_norm": 0.3848879933357239, "learning_rate": 3.1656055346070856e-05, "loss": 2.7451, "step": 7790 }, { "epoch": 0.6388521650012811, "grad_norm": 0.38539236783981323, "learning_rate": 3.163083559368113e-05, "loss": 2.7099, "step": 7792 }, { "epoch": 0.6390161414296694, "grad_norm": 0.3892047703266144, "learning_rate": 3.1605621242700986e-05, "loss": 2.7378, "step": 7794 }, { "epoch": 0.639180117858058, "grad_norm": 0.38269326090812683, "learning_rate": 3.158041230054471e-05, "loss": 2.7244, "step": 7796 }, { "epoch": 0.6393440942864463, "grad_norm": 0.43198147416114807, "learning_rate": 3.15552087746249e-05, "loss": 2.7968, "step": 7798 }, { "epoch": 0.6395080707148347, "grad_norm": 0.4099862277507782, "learning_rate": 3.1530010672352573e-05, "loss": 2.6968, "step": 7800 }, { "epoch": 0.6396720471432231, "grad_norm": 0.388921320438385, "learning_rate": 3.150481800113721e-05, "loss": 2.6984, "step": 7802 }, { "epoch": 0.6398360235716116, "grad_norm": 0.37909209728240967, "learning_rate": 3.147963076838665e-05, "loss": 2.7149, "step": 7804 }, { "epoch": 0.64, "grad_norm": 0.4047726094722748, "learning_rate": 3.1454448981507105e-05, "loss": 2.7068, "step": 7806 }, { "epoch": 0.6401639764283884, "grad_norm": 0.3683038353919983, "learning_rate": 3.142927264790327e-05, "loss": 2.7194, "step": 7808 }, { "epoch": 0.6403279528567768, "grad_norm": 0.3726213872432709, "learning_rate": 3.140410177497815e-05, "loss": 2.7206, "step": 7810 }, { "epoch": 0.6404919292851653, "grad_norm": 0.41055673360824585, "learning_rate": 3.137893637013318e-05, "loss": 2.6806, "step": 7812 }, { "epoch": 0.6406559057135537, "grad_norm": 0.42934727668762207, "learning_rate": 3.135377644076822e-05, "loss": 2.7264, "step": 7814 }, { "epoch": 0.6408198821419421, "grad_norm": 0.3882001042366028, "learning_rate": 3.132862199428147e-05, "loss": 2.6832, "step": 7816 }, { "epoch": 0.6409838585703305, "grad_norm": 0.3750775456428528, "learning_rate": 3.130347303806952e-05, "loss": 2.7212, "step": 7818 }, { "epoch": 0.641147834998719, "grad_norm": 0.36462724208831787, "learning_rate": 3.127832957952739e-05, "loss": 2.7495, "step": 7820 }, { "epoch": 0.6413118114271074, "grad_norm": 0.37588170170783997, "learning_rate": 3.125319162604844e-05, "loss": 2.6972, "step": 7822 }, { "epoch": 0.6414757878554957, "grad_norm": 0.3652655780315399, "learning_rate": 3.1228059185024415e-05, "loss": 2.7406, "step": 7824 }, { "epoch": 0.6416397642838841, "grad_norm": 0.3910474181175232, "learning_rate": 3.120293226384548e-05, "loss": 2.712, "step": 7826 }, { "epoch": 0.6418037407122726, "grad_norm": 0.35795921087265015, "learning_rate": 3.1177810869900125e-05, "loss": 2.7015, "step": 7828 }, { "epoch": 0.641967717140661, "grad_norm": 0.385695219039917, "learning_rate": 3.115269501057523e-05, "loss": 2.6667, "step": 7830 }, { "epoch": 0.6421316935690494, "grad_norm": 0.3897539973258972, "learning_rate": 3.112758469325609e-05, "loss": 2.7592, "step": 7832 }, { "epoch": 0.6422956699974379, "grad_norm": 0.3896655738353729, "learning_rate": 3.11024799253263e-05, "loss": 2.646, "step": 7834 }, { "epoch": 0.6424596464258263, "grad_norm": 0.36518627405166626, "learning_rate": 3.107738071416785e-05, "loss": 2.7515, "step": 7836 }, { "epoch": 0.6426236228542147, "grad_norm": 0.3937589228153229, "learning_rate": 3.1052287067161146e-05, "loss": 2.7264, "step": 7838 }, { "epoch": 0.6427875992826031, "grad_norm": 0.38585948944091797, "learning_rate": 3.102719899168489e-05, "loss": 2.6811, "step": 7840 }, { "epoch": 0.6429515757109916, "grad_norm": 0.427311509847641, "learning_rate": 3.100211649511615e-05, "loss": 2.7838, "step": 7842 }, { "epoch": 0.64311555213938, "grad_norm": 0.4227888882160187, "learning_rate": 3.097703958483043e-05, "loss": 2.6755, "step": 7844 }, { "epoch": 0.6432795285677684, "grad_norm": 0.3813656270503998, "learning_rate": 3.09519682682015e-05, "loss": 2.7564, "step": 7846 }, { "epoch": 0.6434435049961568, "grad_norm": 0.3923702836036682, "learning_rate": 3.092690255260151e-05, "loss": 2.7213, "step": 7848 }, { "epoch": 0.6436074814245453, "grad_norm": 0.3898935317993164, "learning_rate": 3.090184244540101e-05, "loss": 2.7167, "step": 7850 }, { "epoch": 0.6437714578529337, "grad_norm": 0.4096493124961853, "learning_rate": 3.087678795396886e-05, "loss": 2.7552, "step": 7852 }, { "epoch": 0.643935434281322, "grad_norm": 0.3937073349952698, "learning_rate": 3.085173908567224e-05, "loss": 2.7003, "step": 7854 }, { "epoch": 0.6440994107097104, "grad_norm": 0.37011292576789856, "learning_rate": 3.082669584787674e-05, "loss": 2.7193, "step": 7856 }, { "epoch": 0.6442633871380989, "grad_norm": 0.38424769043922424, "learning_rate": 3.080165824794627e-05, "loss": 2.6545, "step": 7858 }, { "epoch": 0.6444273635664873, "grad_norm": 0.3760468363761902, "learning_rate": 3.077662629324304e-05, "loss": 2.6959, "step": 7860 }, { "epoch": 0.6445913399948757, "grad_norm": 0.3764878511428833, "learning_rate": 3.075159999112769e-05, "loss": 2.6651, "step": 7862 }, { "epoch": 0.6447553164232641, "grad_norm": 0.3846738934516907, "learning_rate": 3.07265793489591e-05, "loss": 2.6702, "step": 7864 }, { "epoch": 0.6449192928516526, "grad_norm": 0.3727152347564697, "learning_rate": 3.0701564374094546e-05, "loss": 2.681, "step": 7866 }, { "epoch": 0.645083269280041, "grad_norm": 0.3666906952857971, "learning_rate": 3.067655507388965e-05, "loss": 2.7018, "step": 7868 }, { "epoch": 0.6452472457084294, "grad_norm": 0.37696129083633423, "learning_rate": 3.0651551455698314e-05, "loss": 2.7595, "step": 7870 }, { "epoch": 0.6454112221368178, "grad_norm": 0.4079241454601288, "learning_rate": 3.062655352687276e-05, "loss": 2.7567, "step": 7872 }, { "epoch": 0.6455751985652063, "grad_norm": 0.39961734414100647, "learning_rate": 3.060156129476364e-05, "loss": 2.7478, "step": 7874 }, { "epoch": 0.6457391749935947, "grad_norm": 0.3890273869037628, "learning_rate": 3.0576574766719814e-05, "loss": 2.7414, "step": 7876 }, { "epoch": 0.6459031514219831, "grad_norm": 0.3946554660797119, "learning_rate": 3.055159395008851e-05, "loss": 2.7703, "step": 7878 }, { "epoch": 0.6460671278503715, "grad_norm": 0.4062712490558624, "learning_rate": 3.052661885221531e-05, "loss": 2.7585, "step": 7880 }, { "epoch": 0.64623110427876, "grad_norm": 0.3980555832386017, "learning_rate": 3.050164948044406e-05, "loss": 2.6951, "step": 7882 }, { "epoch": 0.6463950807071484, "grad_norm": 0.4146850109100342, "learning_rate": 3.0476685842116932e-05, "loss": 2.731, "step": 7884 }, { "epoch": 0.6465590571355367, "grad_norm": 0.4119962453842163, "learning_rate": 3.045172794457446e-05, "loss": 2.6566, "step": 7886 }, { "epoch": 0.6467230335639251, "grad_norm": 0.40596380829811096, "learning_rate": 3.042677579515544e-05, "loss": 2.7325, "step": 7888 }, { "epoch": 0.6468870099923136, "grad_norm": 0.37548547983169556, "learning_rate": 3.0401829401196963e-05, "loss": 2.6471, "step": 7890 }, { "epoch": 0.647050986420702, "grad_norm": 0.3889716565608978, "learning_rate": 3.0376888770034506e-05, "loss": 2.6794, "step": 7892 }, { "epoch": 0.6472149628490904, "grad_norm": 0.4179765284061432, "learning_rate": 3.0351953909001784e-05, "loss": 2.6474, "step": 7894 }, { "epoch": 0.6473789392774789, "grad_norm": 0.39606013894081116, "learning_rate": 3.0327024825430827e-05, "loss": 2.7276, "step": 7896 }, { "epoch": 0.6475429157058673, "grad_norm": 0.36344513297080994, "learning_rate": 3.0302101526651973e-05, "loss": 2.7069, "step": 7898 }, { "epoch": 0.6477068921342557, "grad_norm": 0.364402711391449, "learning_rate": 3.0277184019993876e-05, "loss": 2.622, "step": 7900 }, { "epoch": 0.6478708685626441, "grad_norm": 0.37299656867980957, "learning_rate": 3.0252272312783448e-05, "loss": 2.7014, "step": 7902 }, { "epoch": 0.6480348449910326, "grad_norm": 0.3982824385166168, "learning_rate": 3.0227366412345937e-05, "loss": 2.7368, "step": 7904 }, { "epoch": 0.648198821419421, "grad_norm": 0.4151482582092285, "learning_rate": 3.0202466326004863e-05, "loss": 2.7334, "step": 7906 }, { "epoch": 0.6483627978478094, "grad_norm": 0.40278491377830505, "learning_rate": 3.0177572061082003e-05, "loss": 2.6538, "step": 7908 }, { "epoch": 0.6485267742761978, "grad_norm": 0.3986111581325531, "learning_rate": 3.0152683624897514e-05, "loss": 2.6906, "step": 7910 }, { "epoch": 0.6486907507045863, "grad_norm": 0.3914695084095001, "learning_rate": 3.0127801024769746e-05, "loss": 2.695, "step": 7912 }, { "epoch": 0.6488547271329747, "grad_norm": 0.39455148577690125, "learning_rate": 3.0102924268015365e-05, "loss": 2.7245, "step": 7914 }, { "epoch": 0.649018703561363, "grad_norm": 0.3723244369029999, "learning_rate": 3.0078053361949342e-05, "loss": 2.7177, "step": 7916 }, { "epoch": 0.6491826799897514, "grad_norm": 0.4205113351345062, "learning_rate": 3.005318831388491e-05, "loss": 2.71, "step": 7918 }, { "epoch": 0.6493466564181399, "grad_norm": 0.373691588640213, "learning_rate": 3.002832913113356e-05, "loss": 2.7145, "step": 7920 }, { "epoch": 0.6495106328465283, "grad_norm": 0.3899837136268616, "learning_rate": 3.0003475821005078e-05, "loss": 2.7439, "step": 7922 }, { "epoch": 0.6496746092749167, "grad_norm": 0.4084588587284088, "learning_rate": 2.997862839080755e-05, "loss": 2.7286, "step": 7924 }, { "epoch": 0.6498385857033051, "grad_norm": 0.3797529935836792, "learning_rate": 2.9953786847847264e-05, "loss": 2.6866, "step": 7926 }, { "epoch": 0.6500025621316936, "grad_norm": 0.417545884847641, "learning_rate": 2.9928951199428846e-05, "loss": 2.7426, "step": 7928 }, { "epoch": 0.650166538560082, "grad_norm": 0.37970849871635437, "learning_rate": 2.9904121452855155e-05, "loss": 2.6708, "step": 7930 }, { "epoch": 0.6503305149884704, "grad_norm": 0.3815860450267792, "learning_rate": 2.9879297615427294e-05, "loss": 2.7971, "step": 7932 }, { "epoch": 0.6504944914168588, "grad_norm": 0.37229830026626587, "learning_rate": 2.985447969444469e-05, "loss": 2.7025, "step": 7934 }, { "epoch": 0.6506584678452473, "grad_norm": 0.35849589109420776, "learning_rate": 2.982966769720499e-05, "loss": 2.7276, "step": 7936 }, { "epoch": 0.6508224442736357, "grad_norm": 0.3986893892288208, "learning_rate": 2.9804861631004065e-05, "loss": 2.7355, "step": 7938 }, { "epoch": 0.6509864207020241, "grad_norm": 0.40123265981674194, "learning_rate": 2.9780061503136124e-05, "loss": 2.6626, "step": 7940 }, { "epoch": 0.6511503971304125, "grad_norm": 0.3948691487312317, "learning_rate": 2.9755267320893576e-05, "loss": 2.693, "step": 7942 }, { "epoch": 0.651314373558801, "grad_norm": 0.36041921377182007, "learning_rate": 2.9730479091567082e-05, "loss": 2.7281, "step": 7944 }, { "epoch": 0.6514783499871893, "grad_norm": 0.4022723138332367, "learning_rate": 2.970569682244557e-05, "loss": 2.6919, "step": 7946 }, { "epoch": 0.6516423264155777, "grad_norm": 0.40095844864845276, "learning_rate": 2.9680920520816202e-05, "loss": 2.7113, "step": 7948 }, { "epoch": 0.6518063028439662, "grad_norm": 0.3731570541858673, "learning_rate": 2.9656150193964384e-05, "loss": 2.701, "step": 7950 }, { "epoch": 0.6519702792723546, "grad_norm": 0.37078115344047546, "learning_rate": 2.9631385849173797e-05, "loss": 2.7549, "step": 7952 }, { "epoch": 0.652134255700743, "grad_norm": 0.3733912706375122, "learning_rate": 2.960662749372633e-05, "loss": 2.7065, "step": 7954 }, { "epoch": 0.6522982321291314, "grad_norm": 0.38202810287475586, "learning_rate": 2.9581875134902093e-05, "loss": 2.7198, "step": 7956 }, { "epoch": 0.6524622085575199, "grad_norm": 0.4054269790649414, "learning_rate": 2.9557128779979493e-05, "loss": 2.6795, "step": 7958 }, { "epoch": 0.6526261849859083, "grad_norm": 0.3874515891075134, "learning_rate": 2.9532388436235127e-05, "loss": 2.6986, "step": 7960 }, { "epoch": 0.6527901614142967, "grad_norm": 0.35889798402786255, "learning_rate": 2.9507654110943813e-05, "loss": 2.7503, "step": 7962 }, { "epoch": 0.6529541378426851, "grad_norm": 0.38609278202056885, "learning_rate": 2.9482925811378658e-05, "loss": 2.7342, "step": 7964 }, { "epoch": 0.6531181142710736, "grad_norm": 0.38416317105293274, "learning_rate": 2.9458203544810935e-05, "loss": 2.6899, "step": 7966 }, { "epoch": 0.653282090699462, "grad_norm": 0.3454633057117462, "learning_rate": 2.943348731851018e-05, "loss": 2.708, "step": 7968 }, { "epoch": 0.6534460671278504, "grad_norm": 0.3578120768070221, "learning_rate": 2.940877713974414e-05, "loss": 2.7144, "step": 7970 }, { "epoch": 0.6536100435562388, "grad_norm": 0.351194828748703, "learning_rate": 2.938407301577878e-05, "loss": 2.6772, "step": 7972 }, { "epoch": 0.6537740199846273, "grad_norm": 0.37222763895988464, "learning_rate": 2.935937495387827e-05, "loss": 2.7184, "step": 7974 }, { "epoch": 0.6539379964130156, "grad_norm": 0.35520943999290466, "learning_rate": 2.933468296130507e-05, "loss": 2.7234, "step": 7976 }, { "epoch": 0.654101972841404, "grad_norm": 0.37274038791656494, "learning_rate": 2.930999704531976e-05, "loss": 2.7176, "step": 7978 }, { "epoch": 0.6542659492697924, "grad_norm": 0.36820128560066223, "learning_rate": 2.9285317213181173e-05, "loss": 2.6575, "step": 7980 }, { "epoch": 0.6544299256981809, "grad_norm": 0.4004935622215271, "learning_rate": 2.9260643472146387e-05, "loss": 2.7858, "step": 7982 }, { "epoch": 0.6545939021265693, "grad_norm": 0.38279664516448975, "learning_rate": 2.923597582947064e-05, "loss": 2.7268, "step": 7984 }, { "epoch": 0.6547578785549577, "grad_norm": 0.3946802020072937, "learning_rate": 2.9211314292407372e-05, "loss": 2.7099, "step": 7986 }, { "epoch": 0.6549218549833461, "grad_norm": 0.38687431812286377, "learning_rate": 2.9186658868208305e-05, "loss": 2.6559, "step": 7988 }, { "epoch": 0.6550858314117346, "grad_norm": 0.40961745381355286, "learning_rate": 2.916200956412325e-05, "loss": 2.6808, "step": 7990 }, { "epoch": 0.655249807840123, "grad_norm": 0.3674470782279968, "learning_rate": 2.9137366387400307e-05, "loss": 2.7769, "step": 7992 }, { "epoch": 0.6554137842685114, "grad_norm": 0.40013325214385986, "learning_rate": 2.911272934528574e-05, "loss": 2.6931, "step": 7994 }, { "epoch": 0.6555777606968998, "grad_norm": 0.3776344656944275, "learning_rate": 2.908809844502403e-05, "loss": 2.679, "step": 7996 }, { "epoch": 0.6557417371252883, "grad_norm": 0.3463280200958252, "learning_rate": 2.906347369385778e-05, "loss": 2.6805, "step": 7998 }, { "epoch": 0.6559057135536767, "grad_norm": 0.36267954111099243, "learning_rate": 2.9038855099027917e-05, "loss": 2.6864, "step": 8000 }, { "epoch": 0.656069689982065, "grad_norm": 0.36124834418296814, "learning_rate": 2.901424266777343e-05, "loss": 2.7348, "step": 8002 }, { "epoch": 0.6562336664104534, "grad_norm": 0.35316163301467896, "learning_rate": 2.8989636407331554e-05, "loss": 2.6707, "step": 8004 }, { "epoch": 0.6563976428388419, "grad_norm": 0.3646197021007538, "learning_rate": 2.8965036324937722e-05, "loss": 2.7284, "step": 8006 }, { "epoch": 0.6565616192672303, "grad_norm": 0.3676811158657074, "learning_rate": 2.8940442427825503e-05, "loss": 2.7328, "step": 8008 }, { "epoch": 0.6567255956956187, "grad_norm": 0.3920822739601135, "learning_rate": 2.8915854723226688e-05, "loss": 2.7163, "step": 8010 }, { "epoch": 0.6568895721240072, "grad_norm": 0.40281441807746887, "learning_rate": 2.8891273218371228e-05, "loss": 2.6922, "step": 8012 }, { "epoch": 0.6570535485523956, "grad_norm": 0.4011118710041046, "learning_rate": 2.8866697920487283e-05, "loss": 2.7137, "step": 8014 }, { "epoch": 0.657217524980784, "grad_norm": 0.4061340391635895, "learning_rate": 2.8842128836801097e-05, "loss": 2.6876, "step": 8016 }, { "epoch": 0.6573815014091724, "grad_norm": 0.39007076621055603, "learning_rate": 2.881756597453724e-05, "loss": 2.6738, "step": 8018 }, { "epoch": 0.6575454778375609, "grad_norm": 0.418149471282959, "learning_rate": 2.8793009340918298e-05, "loss": 2.724, "step": 8020 }, { "epoch": 0.6577094542659493, "grad_norm": 0.35326430201530457, "learning_rate": 2.876845894316511e-05, "loss": 2.6498, "step": 8022 }, { "epoch": 0.6578734306943377, "grad_norm": 0.41558533906936646, "learning_rate": 2.874391478849666e-05, "loss": 2.7526, "step": 8024 }, { "epoch": 0.6580374071227261, "grad_norm": 0.4501861333847046, "learning_rate": 2.8719376884130133e-05, "loss": 2.6614, "step": 8026 }, { "epoch": 0.6582013835511146, "grad_norm": 0.4039250910282135, "learning_rate": 2.8694845237280788e-05, "loss": 2.7159, "step": 8028 }, { "epoch": 0.658365359979503, "grad_norm": 0.4324546754360199, "learning_rate": 2.867031985516212e-05, "loss": 2.7088, "step": 8030 }, { "epoch": 0.6585293364078914, "grad_norm": 0.43278759717941284, "learning_rate": 2.8645800744985792e-05, "loss": 2.6581, "step": 8032 }, { "epoch": 0.6586933128362797, "grad_norm": 0.37865668535232544, "learning_rate": 2.8621287913961514e-05, "loss": 2.7509, "step": 8034 }, { "epoch": 0.6588572892646682, "grad_norm": 0.38785696029663086, "learning_rate": 2.859678136929731e-05, "loss": 2.7084, "step": 8036 }, { "epoch": 0.6590212656930566, "grad_norm": 0.3775040805339813, "learning_rate": 2.8572281118199225e-05, "loss": 2.7025, "step": 8038 }, { "epoch": 0.659185242121445, "grad_norm": 0.3656921684741974, "learning_rate": 2.854778716787151e-05, "loss": 2.6934, "step": 8040 }, { "epoch": 0.6593492185498334, "grad_norm": 0.4260038733482361, "learning_rate": 2.8523299525516546e-05, "loss": 2.6636, "step": 8042 }, { "epoch": 0.6595131949782219, "grad_norm": 0.38450589776039124, "learning_rate": 2.8498818198334897e-05, "loss": 2.6663, "step": 8044 }, { "epoch": 0.6596771714066103, "grad_norm": 0.370313823223114, "learning_rate": 2.8474343193525206e-05, "loss": 2.6922, "step": 8046 }, { "epoch": 0.6598411478349987, "grad_norm": 0.39956822991371155, "learning_rate": 2.8449874518284298e-05, "loss": 2.6659, "step": 8048 }, { "epoch": 0.6600051242633871, "grad_norm": 0.4072279930114746, "learning_rate": 2.842541217980713e-05, "loss": 2.7305, "step": 8050 }, { "epoch": 0.6601691006917756, "grad_norm": 0.3876146376132965, "learning_rate": 2.8400956185286805e-05, "loss": 2.7112, "step": 8052 }, { "epoch": 0.660333077120164, "grad_norm": 0.3958224058151245, "learning_rate": 2.837650654191456e-05, "loss": 2.7004, "step": 8054 }, { "epoch": 0.6604970535485524, "grad_norm": 0.4068087339401245, "learning_rate": 2.8352063256879724e-05, "loss": 2.7071, "step": 8056 }, { "epoch": 0.6606610299769408, "grad_norm": 0.35557734966278076, "learning_rate": 2.83276263373698e-05, "loss": 2.679, "step": 8058 }, { "epoch": 0.6608250064053293, "grad_norm": 0.36318376660346985, "learning_rate": 2.8303195790570415e-05, "loss": 2.6958, "step": 8060 }, { "epoch": 0.6609889828337177, "grad_norm": 0.3749483525753021, "learning_rate": 2.8278771623665323e-05, "loss": 2.7769, "step": 8062 }, { "epoch": 0.661152959262106, "grad_norm": 0.3773966431617737, "learning_rate": 2.825435384383634e-05, "loss": 2.7026, "step": 8064 }, { "epoch": 0.6613169356904945, "grad_norm": 0.3807618021965027, "learning_rate": 2.8229942458263538e-05, "loss": 2.7566, "step": 8066 }, { "epoch": 0.6614809121188829, "grad_norm": 0.38911864161491394, "learning_rate": 2.8205537474124965e-05, "loss": 2.747, "step": 8068 }, { "epoch": 0.6616448885472713, "grad_norm": 0.36372971534729004, "learning_rate": 2.818113889859687e-05, "loss": 2.6829, "step": 8070 }, { "epoch": 0.6618088649756597, "grad_norm": 0.3598827123641968, "learning_rate": 2.8156746738853613e-05, "loss": 2.7073, "step": 8072 }, { "epoch": 0.6619728414040482, "grad_norm": 0.36627793312072754, "learning_rate": 2.8132361002067613e-05, "loss": 2.6839, "step": 8074 }, { "epoch": 0.6621368178324366, "grad_norm": 0.36478564143180847, "learning_rate": 2.810798169540947e-05, "loss": 2.6743, "step": 8076 }, { "epoch": 0.662300794260825, "grad_norm": 0.34423142671585083, "learning_rate": 2.808360882604784e-05, "loss": 2.6963, "step": 8078 }, { "epoch": 0.6624647706892134, "grad_norm": 0.3476889431476593, "learning_rate": 2.8059242401149544e-05, "loss": 2.6932, "step": 8080 }, { "epoch": 0.6626287471176019, "grad_norm": 0.3661099374294281, "learning_rate": 2.803488242787941e-05, "loss": 2.7451, "step": 8082 }, { "epoch": 0.6627927235459903, "grad_norm": 0.37179726362228394, "learning_rate": 2.801052891340051e-05, "loss": 2.6449, "step": 8084 }, { "epoch": 0.6629566999743787, "grad_norm": 0.3675248324871063, "learning_rate": 2.798618186487387e-05, "loss": 2.7142, "step": 8086 }, { "epoch": 0.6631206764027671, "grad_norm": 0.3806789815425873, "learning_rate": 2.7961841289458713e-05, "loss": 2.6497, "step": 8088 }, { "epoch": 0.6632846528311556, "grad_norm": 0.3967260420322418, "learning_rate": 2.7937507194312323e-05, "loss": 2.7152, "step": 8090 }, { "epoch": 0.663448629259544, "grad_norm": 0.3586444556713104, "learning_rate": 2.7913179586590104e-05, "loss": 2.7196, "step": 8092 }, { "epoch": 0.6636126056879323, "grad_norm": 0.3600021302700043, "learning_rate": 2.788885847344549e-05, "loss": 2.6905, "step": 8094 }, { "epoch": 0.6637765821163207, "grad_norm": 0.40098971128463745, "learning_rate": 2.7864543862030074e-05, "loss": 2.7104, "step": 8096 }, { "epoch": 0.6639405585447092, "grad_norm": 0.4165302813053131, "learning_rate": 2.7840235759493506e-05, "loss": 2.7167, "step": 8098 }, { "epoch": 0.6641045349730976, "grad_norm": 0.39745283126831055, "learning_rate": 2.781593417298352e-05, "loss": 2.7079, "step": 8100 }, { "epoch": 0.664268511401486, "grad_norm": 0.36143335700035095, "learning_rate": 2.7791639109645972e-05, "loss": 2.764, "step": 8102 }, { "epoch": 0.6644324878298744, "grad_norm": 0.36410221457481384, "learning_rate": 2.7767350576624717e-05, "loss": 2.6782, "step": 8104 }, { "epoch": 0.6645964642582629, "grad_norm": 0.40479522943496704, "learning_rate": 2.774306858106177e-05, "loss": 2.7308, "step": 8106 }, { "epoch": 0.6647604406866513, "grad_norm": 0.3978186845779419, "learning_rate": 2.7718793130097187e-05, "loss": 2.6905, "step": 8108 }, { "epoch": 0.6649244171150397, "grad_norm": 0.39073604345321655, "learning_rate": 2.769452423086914e-05, "loss": 2.7269, "step": 8110 }, { "epoch": 0.6650883935434281, "grad_norm": 0.34713536500930786, "learning_rate": 2.7670261890513773e-05, "loss": 2.6669, "step": 8112 }, { "epoch": 0.6652523699718166, "grad_norm": 0.37141019105911255, "learning_rate": 2.764600611616544e-05, "loss": 2.7137, "step": 8114 }, { "epoch": 0.665416346400205, "grad_norm": 0.39449837803840637, "learning_rate": 2.7621756914956455e-05, "loss": 2.7047, "step": 8116 }, { "epoch": 0.6655803228285934, "grad_norm": 0.3795781433582306, "learning_rate": 2.7597514294017245e-05, "loss": 2.6987, "step": 8118 }, { "epoch": 0.6657442992569819, "grad_norm": 0.3567329943180084, "learning_rate": 2.757327826047632e-05, "loss": 2.7415, "step": 8120 }, { "epoch": 0.6659082756853703, "grad_norm": 0.3718640208244324, "learning_rate": 2.7549048821460187e-05, "loss": 2.6568, "step": 8122 }, { "epoch": 0.6660722521137586, "grad_norm": 0.378045916557312, "learning_rate": 2.7524825984093472e-05, "loss": 2.6979, "step": 8124 }, { "epoch": 0.666236228542147, "grad_norm": 0.35238373279571533, "learning_rate": 2.7500609755498857e-05, "loss": 2.7156, "step": 8126 }, { "epoch": 0.6664002049705355, "grad_norm": 0.39142218232154846, "learning_rate": 2.7476400142797077e-05, "loss": 2.7348, "step": 8128 }, { "epoch": 0.6665641813989239, "grad_norm": 0.38561588525772095, "learning_rate": 2.745219715310685e-05, "loss": 2.7155, "step": 8130 }, { "epoch": 0.6667281578273123, "grad_norm": 0.3654249906539917, "learning_rate": 2.742800079354509e-05, "loss": 2.776, "step": 8132 }, { "epoch": 0.6668921342557007, "grad_norm": 0.36338138580322266, "learning_rate": 2.740381107122662e-05, "loss": 2.6734, "step": 8134 }, { "epoch": 0.6670561106840892, "grad_norm": 0.38027507066726685, "learning_rate": 2.7379627993264395e-05, "loss": 2.6938, "step": 8136 }, { "epoch": 0.6672200871124776, "grad_norm": 0.38524648547172546, "learning_rate": 2.7355451566769385e-05, "loss": 2.7491, "step": 8138 }, { "epoch": 0.667384063540866, "grad_norm": 0.37056267261505127, "learning_rate": 2.733128179885064e-05, "loss": 2.7196, "step": 8140 }, { "epoch": 0.6675480399692544, "grad_norm": 0.37558913230895996, "learning_rate": 2.7307118696615174e-05, "loss": 2.7008, "step": 8142 }, { "epoch": 0.6677120163976429, "grad_norm": 0.3649221658706665, "learning_rate": 2.7282962267168122e-05, "loss": 2.7059, "step": 8144 }, { "epoch": 0.6678759928260313, "grad_norm": 0.37081843614578247, "learning_rate": 2.7258812517612632e-05, "loss": 2.6639, "step": 8146 }, { "epoch": 0.6680399692544197, "grad_norm": 0.3784293234348297, "learning_rate": 2.7234669455049834e-05, "loss": 2.7312, "step": 8148 }, { "epoch": 0.668203945682808, "grad_norm": 0.3668130040168762, "learning_rate": 2.7210533086579005e-05, "loss": 2.7087, "step": 8150 }, { "epoch": 0.6683679221111966, "grad_norm": 0.3724210262298584, "learning_rate": 2.718640341929734e-05, "loss": 2.7433, "step": 8152 }, { "epoch": 0.668531898539585, "grad_norm": 0.37186020612716675, "learning_rate": 2.7162280460300127e-05, "loss": 2.7301, "step": 8154 }, { "epoch": 0.6686958749679733, "grad_norm": 0.376110315322876, "learning_rate": 2.7138164216680662e-05, "loss": 2.6791, "step": 8156 }, { "epoch": 0.6688598513963617, "grad_norm": 0.36955568194389343, "learning_rate": 2.711405469553029e-05, "loss": 2.6705, "step": 8158 }, { "epoch": 0.6690238278247502, "grad_norm": 0.3734026551246643, "learning_rate": 2.70899519039383e-05, "loss": 2.6618, "step": 8160 }, { "epoch": 0.6691878042531386, "grad_norm": 0.38206690549850464, "learning_rate": 2.706585584899214e-05, "loss": 2.7474, "step": 8162 }, { "epoch": 0.669351780681527, "grad_norm": 0.40900060534477234, "learning_rate": 2.7041766537777145e-05, "loss": 2.749, "step": 8164 }, { "epoch": 0.6695157571099154, "grad_norm": 0.3761042654514313, "learning_rate": 2.701768397737674e-05, "loss": 2.6532, "step": 8166 }, { "epoch": 0.6696797335383039, "grad_norm": 0.361674964427948, "learning_rate": 2.6993608174872355e-05, "loss": 2.7127, "step": 8168 }, { "epoch": 0.6698437099666923, "grad_norm": 0.37260809540748596, "learning_rate": 2.696953913734339e-05, "loss": 2.6408, "step": 8170 }, { "epoch": 0.6700076863950807, "grad_norm": 0.3890365660190582, "learning_rate": 2.6945476871867302e-05, "loss": 2.6665, "step": 8172 }, { "epoch": 0.6701716628234691, "grad_norm": 0.35239818692207336, "learning_rate": 2.6921421385519562e-05, "loss": 2.6262, "step": 8174 }, { "epoch": 0.6703356392518576, "grad_norm": 0.368145227432251, "learning_rate": 2.6897372685373634e-05, "loss": 2.7065, "step": 8176 }, { "epoch": 0.670499615680246, "grad_norm": 0.37635675072669983, "learning_rate": 2.6873330778500915e-05, "loss": 2.6372, "step": 8178 }, { "epoch": 0.6706635921086344, "grad_norm": 0.3594423830509186, "learning_rate": 2.684929567197097e-05, "loss": 2.757, "step": 8180 }, { "epoch": 0.6708275685370229, "grad_norm": 0.39035844802856445, "learning_rate": 2.6825267372851193e-05, "loss": 2.6989, "step": 8182 }, { "epoch": 0.6709915449654112, "grad_norm": 0.36963799595832825, "learning_rate": 2.6801245888207076e-05, "loss": 2.6732, "step": 8184 }, { "epoch": 0.6711555213937996, "grad_norm": 0.4206686317920685, "learning_rate": 2.6777231225102072e-05, "loss": 2.701, "step": 8186 }, { "epoch": 0.671319497822188, "grad_norm": 0.36891210079193115, "learning_rate": 2.6753223390597666e-05, "loss": 2.7115, "step": 8188 }, { "epoch": 0.6714834742505765, "grad_norm": 0.36788102984428406, "learning_rate": 2.6729222391753255e-05, "loss": 2.6402, "step": 8190 }, { "epoch": 0.6716474506789649, "grad_norm": 0.3817219138145447, "learning_rate": 2.67052282356263e-05, "loss": 2.7658, "step": 8192 }, { "epoch": 0.6718114271073533, "grad_norm": 0.35640057921409607, "learning_rate": 2.6681240929272254e-05, "loss": 2.7745, "step": 8194 }, { "epoch": 0.6719754035357417, "grad_norm": 0.3562081754207611, "learning_rate": 2.6657260479744462e-05, "loss": 2.664, "step": 8196 }, { "epoch": 0.6721393799641302, "grad_norm": 0.37609589099884033, "learning_rate": 2.663328689409439e-05, "loss": 2.6658, "step": 8198 }, { "epoch": 0.6723033563925186, "grad_norm": 0.39180171489715576, "learning_rate": 2.6609320179371367e-05, "loss": 2.708, "step": 8200 }, { "epoch": 0.672467332820907, "grad_norm": 0.36664754152297974, "learning_rate": 2.6585360342622766e-05, "loss": 2.651, "step": 8202 }, { "epoch": 0.6726313092492954, "grad_norm": 0.38205891847610474, "learning_rate": 2.6561407390893916e-05, "loss": 2.692, "step": 8204 }, { "epoch": 0.6727952856776839, "grad_norm": 0.4301818013191223, "learning_rate": 2.6537461331228153e-05, "loss": 2.7361, "step": 8206 }, { "epoch": 0.6729592621060723, "grad_norm": 0.39791837334632874, "learning_rate": 2.6513522170666717e-05, "loss": 2.7151, "step": 8208 }, { "epoch": 0.6731232385344607, "grad_norm": 0.3955062925815582, "learning_rate": 2.648958991624888e-05, "loss": 2.7527, "step": 8210 }, { "epoch": 0.673287214962849, "grad_norm": 0.3791442811489105, "learning_rate": 2.6465664575011868e-05, "loss": 2.7517, "step": 8212 }, { "epoch": 0.6734511913912375, "grad_norm": 0.3849659264087677, "learning_rate": 2.6441746153990865e-05, "loss": 2.6752, "step": 8214 }, { "epoch": 0.6736151678196259, "grad_norm": 0.3792452812194824, "learning_rate": 2.6417834660219054e-05, "loss": 2.6883, "step": 8216 }, { "epoch": 0.6737791442480143, "grad_norm": 0.3837055265903473, "learning_rate": 2.6393930100727515e-05, "loss": 2.6416, "step": 8218 }, { "epoch": 0.6739431206764027, "grad_norm": 0.38943207263946533, "learning_rate": 2.6370032482545337e-05, "loss": 2.7423, "step": 8220 }, { "epoch": 0.6741070971047912, "grad_norm": 0.36709269881248474, "learning_rate": 2.6346141812699572e-05, "loss": 2.7491, "step": 8222 }, { "epoch": 0.6742710735331796, "grad_norm": 0.42113515734672546, "learning_rate": 2.6322258098215224e-05, "loss": 2.6578, "step": 8224 }, { "epoch": 0.674435049961568, "grad_norm": 0.411876380443573, "learning_rate": 2.6298381346115186e-05, "loss": 2.7246, "step": 8226 }, { "epoch": 0.6745990263899564, "grad_norm": 0.4018315076828003, "learning_rate": 2.6274511563420445e-05, "loss": 2.6599, "step": 8228 }, { "epoch": 0.6747630028183449, "grad_norm": 0.41591763496398926, "learning_rate": 2.625064875714979e-05, "loss": 2.699, "step": 8230 }, { "epoch": 0.6749269792467333, "grad_norm": 0.4027625620365143, "learning_rate": 2.6226792934320044e-05, "loss": 2.7217, "step": 8232 }, { "epoch": 0.6750909556751217, "grad_norm": 0.37466877698898315, "learning_rate": 2.6202944101945968e-05, "loss": 2.68, "step": 8234 }, { "epoch": 0.6752549321035102, "grad_norm": 0.38026630878448486, "learning_rate": 2.6179102267040235e-05, "loss": 2.6948, "step": 8236 }, { "epoch": 0.6754189085318986, "grad_norm": 0.39000222086906433, "learning_rate": 2.6155267436613483e-05, "loss": 2.7283, "step": 8238 }, { "epoch": 0.675582884960287, "grad_norm": 0.3974981904029846, "learning_rate": 2.6131439617674303e-05, "loss": 2.6694, "step": 8240 }, { "epoch": 0.6757468613886753, "grad_norm": 0.38007107377052307, "learning_rate": 2.6107618817229212e-05, "loss": 2.6856, "step": 8242 }, { "epoch": 0.6759108378170638, "grad_norm": 0.3747228980064392, "learning_rate": 2.608380504228261e-05, "loss": 2.691, "step": 8244 }, { "epoch": 0.6760748142454522, "grad_norm": 0.3980855941772461, "learning_rate": 2.6059998299836957e-05, "loss": 2.7035, "step": 8246 }, { "epoch": 0.6762387906738406, "grad_norm": 0.37124818563461304, "learning_rate": 2.6036198596892515e-05, "loss": 2.6648, "step": 8248 }, { "epoch": 0.676402767102229, "grad_norm": 0.38610053062438965, "learning_rate": 2.6012405940447548e-05, "loss": 2.7189, "step": 8250 }, { "epoch": 0.6765667435306175, "grad_norm": 0.38325104117393494, "learning_rate": 2.5988620337498233e-05, "loss": 2.7064, "step": 8252 }, { "epoch": 0.6767307199590059, "grad_norm": 0.3818075358867645, "learning_rate": 2.5964841795038685e-05, "loss": 2.7091, "step": 8254 }, { "epoch": 0.6768946963873943, "grad_norm": 0.3741518259048462, "learning_rate": 2.5941070320060895e-05, "loss": 2.6557, "step": 8256 }, { "epoch": 0.6770586728157827, "grad_norm": 0.3564099967479706, "learning_rate": 2.5917305919554824e-05, "loss": 2.632, "step": 8258 }, { "epoch": 0.6772226492441712, "grad_norm": 0.3514571487903595, "learning_rate": 2.589354860050836e-05, "loss": 2.7041, "step": 8260 }, { "epoch": 0.6773866256725596, "grad_norm": 0.38728657364845276, "learning_rate": 2.586979836990723e-05, "loss": 2.7169, "step": 8262 }, { "epoch": 0.677550602100948, "grad_norm": 0.38779333233833313, "learning_rate": 2.584605523473521e-05, "loss": 2.7581, "step": 8264 }, { "epoch": 0.6777145785293364, "grad_norm": 0.38863226771354675, "learning_rate": 2.5822319201973855e-05, "loss": 2.7207, "step": 8266 }, { "epoch": 0.6778785549577249, "grad_norm": 0.38649144768714905, "learning_rate": 2.579859027860271e-05, "loss": 2.6717, "step": 8268 }, { "epoch": 0.6780425313861133, "grad_norm": 0.3792441785335541, "learning_rate": 2.5774868471599202e-05, "loss": 2.7173, "step": 8270 }, { "epoch": 0.6782065078145016, "grad_norm": 0.38163819909095764, "learning_rate": 2.5751153787938697e-05, "loss": 2.658, "step": 8272 }, { "epoch": 0.67837048424289, "grad_norm": 0.3687981069087982, "learning_rate": 2.5727446234594387e-05, "loss": 2.7493, "step": 8274 }, { "epoch": 0.6785344606712785, "grad_norm": 0.39426374435424805, "learning_rate": 2.57037458185375e-05, "loss": 2.725, "step": 8276 }, { "epoch": 0.6786984370996669, "grad_norm": 0.36612024903297424, "learning_rate": 2.5680052546737027e-05, "loss": 2.709, "step": 8278 }, { "epoch": 0.6788624135280553, "grad_norm": 0.35995176434516907, "learning_rate": 2.565636642615994e-05, "loss": 2.7274, "step": 8280 }, { "epoch": 0.6790263899564437, "grad_norm": 0.3847874104976654, "learning_rate": 2.56326874637711e-05, "loss": 2.6745, "step": 8282 }, { "epoch": 0.6791903663848322, "grad_norm": 0.37445297837257385, "learning_rate": 2.5609015666533214e-05, "loss": 2.6481, "step": 8284 }, { "epoch": 0.6793543428132206, "grad_norm": 0.3875288963317871, "learning_rate": 2.558535104140694e-05, "loss": 2.7548, "step": 8286 }, { "epoch": 0.679518319241609, "grad_norm": 0.37544742226600647, "learning_rate": 2.5561693595350812e-05, "loss": 2.7183, "step": 8288 }, { "epoch": 0.6796822956699974, "grad_norm": 0.39676231145858765, "learning_rate": 2.5538043335321248e-05, "loss": 2.7058, "step": 8290 }, { "epoch": 0.6798462720983859, "grad_norm": 0.39057162404060364, "learning_rate": 2.5514400268272513e-05, "loss": 2.7093, "step": 8292 }, { "epoch": 0.6800102485267743, "grad_norm": 0.3795698285102844, "learning_rate": 2.5490764401156862e-05, "loss": 2.6772, "step": 8294 }, { "epoch": 0.6801742249551627, "grad_norm": 0.3786967992782593, "learning_rate": 2.5467135740924308e-05, "loss": 2.7112, "step": 8296 }, { "epoch": 0.6803382013835512, "grad_norm": 0.3857060670852661, "learning_rate": 2.544351429452283e-05, "loss": 2.6722, "step": 8298 }, { "epoch": 0.6805021778119396, "grad_norm": 0.36618056893348694, "learning_rate": 2.541990006889825e-05, "loss": 2.6766, "step": 8300 }, { "epoch": 0.680666154240328, "grad_norm": 0.39972805976867676, "learning_rate": 2.5396293070994297e-05, "loss": 2.7417, "step": 8302 }, { "epoch": 0.6808301306687163, "grad_norm": 0.40740692615509033, "learning_rate": 2.5372693307752514e-05, "loss": 2.6673, "step": 8304 }, { "epoch": 0.6809941070971048, "grad_norm": 0.39190182089805603, "learning_rate": 2.5349100786112377e-05, "loss": 2.6808, "step": 8306 }, { "epoch": 0.6811580835254932, "grad_norm": 0.4014313220977783, "learning_rate": 2.532551551301123e-05, "loss": 2.7375, "step": 8308 }, { "epoch": 0.6813220599538816, "grad_norm": 0.3573625087738037, "learning_rate": 2.530193749538422e-05, "loss": 2.6694, "step": 8310 }, { "epoch": 0.68148603638227, "grad_norm": 0.3863253891468048, "learning_rate": 2.5278366740164466e-05, "loss": 2.6942, "step": 8312 }, { "epoch": 0.6816500128106585, "grad_norm": 0.38585901260375977, "learning_rate": 2.5254803254282844e-05, "loss": 2.6705, "step": 8314 }, { "epoch": 0.6818139892390469, "grad_norm": 0.3723987936973572, "learning_rate": 2.5231247044668166e-05, "loss": 2.7009, "step": 8316 }, { "epoch": 0.6819779656674353, "grad_norm": 0.4075051546096802, "learning_rate": 2.520769811824707e-05, "loss": 2.6906, "step": 8318 }, { "epoch": 0.6821419420958237, "grad_norm": 0.38526657223701477, "learning_rate": 2.5184156481944086e-05, "loss": 2.6751, "step": 8320 }, { "epoch": 0.6823059185242122, "grad_norm": 0.36364448070526123, "learning_rate": 2.5160622142681546e-05, "loss": 2.6927, "step": 8322 }, { "epoch": 0.6824698949526006, "grad_norm": 0.370485782623291, "learning_rate": 2.513709510737967e-05, "loss": 2.6772, "step": 8324 }, { "epoch": 0.682633871380989, "grad_norm": 0.3559088408946991, "learning_rate": 2.5113575382956546e-05, "loss": 2.6345, "step": 8326 }, { "epoch": 0.6827978478093774, "grad_norm": 0.40118610858917236, "learning_rate": 2.5090062976328076e-05, "loss": 2.7016, "step": 8328 }, { "epoch": 0.6829618242377659, "grad_norm": 0.37945353984832764, "learning_rate": 2.506655789440806e-05, "loss": 2.6757, "step": 8330 }, { "epoch": 0.6831258006661542, "grad_norm": 0.3832918703556061, "learning_rate": 2.5043060144108078e-05, "loss": 2.7603, "step": 8332 }, { "epoch": 0.6832897770945426, "grad_norm": 0.3587692677974701, "learning_rate": 2.501956973233759e-05, "loss": 2.7212, "step": 8334 }, { "epoch": 0.683453753522931, "grad_norm": 0.3815983235836029, "learning_rate": 2.4996086666003898e-05, "loss": 2.7249, "step": 8336 }, { "epoch": 0.6836177299513195, "grad_norm": 0.3645575940608978, "learning_rate": 2.497261095201218e-05, "loss": 2.721, "step": 8338 }, { "epoch": 0.6837817063797079, "grad_norm": 0.38928934931755066, "learning_rate": 2.494914259726534e-05, "loss": 2.6733, "step": 8340 }, { "epoch": 0.6839456828080963, "grad_norm": 0.37325677275657654, "learning_rate": 2.492568160866428e-05, "loss": 2.7085, "step": 8342 }, { "epoch": 0.6841096592364847, "grad_norm": 0.3798007369041443, "learning_rate": 2.490222799310758e-05, "loss": 2.658, "step": 8344 }, { "epoch": 0.6842736356648732, "grad_norm": 0.3864426612854004, "learning_rate": 2.4878781757491754e-05, "loss": 2.7088, "step": 8346 }, { "epoch": 0.6844376120932616, "grad_norm": 0.3895781338214874, "learning_rate": 2.4855342908711116e-05, "loss": 2.6388, "step": 8348 }, { "epoch": 0.68460158852165, "grad_norm": 0.3879987299442291, "learning_rate": 2.4831911453657774e-05, "loss": 2.7558, "step": 8350 }, { "epoch": 0.6847655649500385, "grad_norm": 0.3584182560443878, "learning_rate": 2.4808487399221715e-05, "loss": 2.701, "step": 8352 }, { "epoch": 0.6849295413784269, "grad_norm": 0.3610781133174896, "learning_rate": 2.4785070752290722e-05, "loss": 2.7193, "step": 8354 }, { "epoch": 0.6850935178068153, "grad_norm": 0.40857601165771484, "learning_rate": 2.476166151975042e-05, "loss": 2.698, "step": 8356 }, { "epoch": 0.6852574942352037, "grad_norm": 0.3872397840023041, "learning_rate": 2.4738259708484196e-05, "loss": 2.7307, "step": 8358 }, { "epoch": 0.6854214706635922, "grad_norm": 0.36924323439598083, "learning_rate": 2.471486532537336e-05, "loss": 2.6702, "step": 8360 }, { "epoch": 0.6855854470919805, "grad_norm": 0.3698207437992096, "learning_rate": 2.4691478377296924e-05, "loss": 2.6859, "step": 8362 }, { "epoch": 0.6857494235203689, "grad_norm": 0.3924945592880249, "learning_rate": 2.466809887113178e-05, "loss": 2.6889, "step": 8364 }, { "epoch": 0.6859133999487573, "grad_norm": 0.399661660194397, "learning_rate": 2.4644726813752622e-05, "loss": 2.6925, "step": 8366 }, { "epoch": 0.6860773763771458, "grad_norm": 0.3852911591529846, "learning_rate": 2.462136221203196e-05, "loss": 2.6954, "step": 8368 }, { "epoch": 0.6862413528055342, "grad_norm": 0.39803236722946167, "learning_rate": 2.4598005072840074e-05, "loss": 2.7448, "step": 8370 }, { "epoch": 0.6864053292339226, "grad_norm": 0.39910784363746643, "learning_rate": 2.4574655403045087e-05, "loss": 2.6747, "step": 8372 }, { "epoch": 0.686569305662311, "grad_norm": 0.3956886827945709, "learning_rate": 2.4551313209512917e-05, "loss": 2.704, "step": 8374 }, { "epoch": 0.6867332820906995, "grad_norm": 0.39390331506729126, "learning_rate": 2.452797849910728e-05, "loss": 2.6991, "step": 8376 }, { "epoch": 0.6868972585190879, "grad_norm": 0.3898649513721466, "learning_rate": 2.450465127868971e-05, "loss": 2.6889, "step": 8378 }, { "epoch": 0.6870612349474763, "grad_norm": 0.3891480565071106, "learning_rate": 2.4481331555119485e-05, "loss": 2.6686, "step": 8380 }, { "epoch": 0.6872252113758647, "grad_norm": 0.3815426826477051, "learning_rate": 2.4458019335253734e-05, "loss": 2.7233, "step": 8382 }, { "epoch": 0.6873891878042532, "grad_norm": 0.406530499458313, "learning_rate": 2.4434714625947363e-05, "loss": 2.7453, "step": 8384 }, { "epoch": 0.6875531642326416, "grad_norm": 0.3807404041290283, "learning_rate": 2.441141743405308e-05, "loss": 2.6957, "step": 8386 }, { "epoch": 0.68771714066103, "grad_norm": 0.3976084589958191, "learning_rate": 2.438812776642132e-05, "loss": 2.69, "step": 8388 }, { "epoch": 0.6878811170894183, "grad_norm": 0.38501620292663574, "learning_rate": 2.4364845629900423e-05, "loss": 2.6682, "step": 8390 }, { "epoch": 0.6880450935178068, "grad_norm": 0.4026597738265991, "learning_rate": 2.4341571031336395e-05, "loss": 2.7205, "step": 8392 }, { "epoch": 0.6882090699461952, "grad_norm": 0.3994022607803345, "learning_rate": 2.4318303977573097e-05, "loss": 2.7379, "step": 8394 }, { "epoch": 0.6883730463745836, "grad_norm": 0.40519949793815613, "learning_rate": 2.4295044475452167e-05, "loss": 2.6898, "step": 8396 }, { "epoch": 0.688537022802972, "grad_norm": 0.36324363946914673, "learning_rate": 2.427179253181297e-05, "loss": 2.7469, "step": 8398 }, { "epoch": 0.6887009992313605, "grad_norm": 0.38369739055633545, "learning_rate": 2.4248548153492702e-05, "loss": 2.677, "step": 8400 }, { "epoch": 0.6888649756597489, "grad_norm": 0.4025323987007141, "learning_rate": 2.4225311347326317e-05, "loss": 2.7053, "step": 8402 }, { "epoch": 0.6890289520881373, "grad_norm": 0.3958849608898163, "learning_rate": 2.4202082120146573e-05, "loss": 2.7344, "step": 8404 }, { "epoch": 0.6891929285165258, "grad_norm": 0.37935763597488403, "learning_rate": 2.4178860478783903e-05, "loss": 2.6673, "step": 8406 }, { "epoch": 0.6893569049449142, "grad_norm": 0.40843576192855835, "learning_rate": 2.4155646430066653e-05, "loss": 2.7021, "step": 8408 }, { "epoch": 0.6895208813733026, "grad_norm": 0.43301036953926086, "learning_rate": 2.4132439980820805e-05, "loss": 2.7339, "step": 8410 }, { "epoch": 0.689684857801691, "grad_norm": 0.3878119885921478, "learning_rate": 2.4109241137870176e-05, "loss": 2.7106, "step": 8412 }, { "epoch": 0.6898488342300795, "grad_norm": 0.3935549855232239, "learning_rate": 2.4086049908036335e-05, "loss": 2.7239, "step": 8414 }, { "epoch": 0.6900128106584679, "grad_norm": 0.3892782926559448, "learning_rate": 2.4062866298138626e-05, "loss": 2.7227, "step": 8416 }, { "epoch": 0.6901767870868563, "grad_norm": 0.40126466751098633, "learning_rate": 2.4039690314994096e-05, "loss": 2.6684, "step": 8418 }, { "epoch": 0.6903407635152446, "grad_norm": 0.40310603380203247, "learning_rate": 2.401652196541761e-05, "loss": 2.7113, "step": 8420 }, { "epoch": 0.6905047399436332, "grad_norm": 0.3945203423500061, "learning_rate": 2.3993361256221784e-05, "loss": 2.7171, "step": 8422 }, { "epoch": 0.6906687163720215, "grad_norm": 0.39180508255958557, "learning_rate": 2.3970208194216914e-05, "loss": 2.7298, "step": 8424 }, { "epoch": 0.6908326928004099, "grad_norm": 0.38912150263786316, "learning_rate": 2.3947062786211183e-05, "loss": 2.6408, "step": 8426 }, { "epoch": 0.6909966692287983, "grad_norm": 0.37896761298179626, "learning_rate": 2.3923925039010375e-05, "loss": 2.7209, "step": 8428 }, { "epoch": 0.6911606456571868, "grad_norm": 0.38243967294692993, "learning_rate": 2.3900794959418123e-05, "loss": 2.6873, "step": 8430 }, { "epoch": 0.6913246220855752, "grad_norm": 0.3906749486923218, "learning_rate": 2.3877672554235765e-05, "loss": 2.6535, "step": 8432 }, { "epoch": 0.6914885985139636, "grad_norm": 0.3702877461910248, "learning_rate": 2.385455783026241e-05, "loss": 2.6838, "step": 8434 }, { "epoch": 0.691652574942352, "grad_norm": 0.42546260356903076, "learning_rate": 2.383145079429483e-05, "loss": 2.7067, "step": 8436 }, { "epoch": 0.6918165513707405, "grad_norm": 0.38497427105903625, "learning_rate": 2.380835145312767e-05, "loss": 2.6411, "step": 8438 }, { "epoch": 0.6919805277991289, "grad_norm": 0.3927631378173828, "learning_rate": 2.3785259813553186e-05, "loss": 2.66, "step": 8440 }, { "epoch": 0.6921445042275173, "grad_norm": 0.40803998708724976, "learning_rate": 2.3762175882361437e-05, "loss": 2.7613, "step": 8442 }, { "epoch": 0.6923084806559057, "grad_norm": 0.3995896875858307, "learning_rate": 2.3739099666340215e-05, "loss": 2.6944, "step": 8444 }, { "epoch": 0.6924724570842942, "grad_norm": 0.39971452951431274, "learning_rate": 2.3716031172274988e-05, "loss": 2.6926, "step": 8446 }, { "epoch": 0.6926364335126826, "grad_norm": 0.3683062195777893, "learning_rate": 2.3692970406949017e-05, "loss": 2.7307, "step": 8448 }, { "epoch": 0.692800409941071, "grad_norm": 0.38081446290016174, "learning_rate": 2.3669917377143258e-05, "loss": 2.7179, "step": 8450 }, { "epoch": 0.6929643863694593, "grad_norm": 0.39257097244262695, "learning_rate": 2.364687208963643e-05, "loss": 2.7109, "step": 8452 }, { "epoch": 0.6931283627978478, "grad_norm": 0.38061755895614624, "learning_rate": 2.3623834551204876e-05, "loss": 2.7346, "step": 8454 }, { "epoch": 0.6932923392262362, "grad_norm": 0.3543435037136078, "learning_rate": 2.360080476862282e-05, "loss": 2.7067, "step": 8456 }, { "epoch": 0.6934563156546246, "grad_norm": 0.3647612929344177, "learning_rate": 2.357778274866204e-05, "loss": 2.6797, "step": 8458 }, { "epoch": 0.693620292083013, "grad_norm": 0.36694076657295227, "learning_rate": 2.355476849809215e-05, "loss": 2.7077, "step": 8460 }, { "epoch": 0.6937842685114015, "grad_norm": 0.35562363266944885, "learning_rate": 2.3531762023680415e-05, "loss": 2.7323, "step": 8462 }, { "epoch": 0.6939482449397899, "grad_norm": 0.34998953342437744, "learning_rate": 2.3508763332191864e-05, "loss": 2.749, "step": 8464 }, { "epoch": 0.6941122213681783, "grad_norm": 0.3599771559238434, "learning_rate": 2.3485772430389168e-05, "loss": 2.6812, "step": 8466 }, { "epoch": 0.6942761977965668, "grad_norm": 0.3789721429347992, "learning_rate": 2.3462789325032764e-05, "loss": 2.6888, "step": 8468 }, { "epoch": 0.6944401742249552, "grad_norm": 0.3616909086704254, "learning_rate": 2.3439814022880803e-05, "loss": 2.7342, "step": 8470 }, { "epoch": 0.6946041506533436, "grad_norm": 0.376828134059906, "learning_rate": 2.3416846530689064e-05, "loss": 2.683, "step": 8472 }, { "epoch": 0.694768127081732, "grad_norm": 0.37655776739120483, "learning_rate": 2.339388685521115e-05, "loss": 2.6618, "step": 8474 }, { "epoch": 0.6949321035101205, "grad_norm": 0.3913043141365051, "learning_rate": 2.3370935003198253e-05, "loss": 2.7671, "step": 8476 }, { "epoch": 0.6950960799385089, "grad_norm": 0.3889150023460388, "learning_rate": 2.3347990981399325e-05, "loss": 2.7254, "step": 8478 }, { "epoch": 0.6952600563668972, "grad_norm": 0.3825925588607788, "learning_rate": 2.3325054796561007e-05, "loss": 2.7199, "step": 8480 }, { "epoch": 0.6954240327952856, "grad_norm": 0.3712370991706848, "learning_rate": 2.3302126455427643e-05, "loss": 2.6727, "step": 8482 }, { "epoch": 0.6955880092236741, "grad_norm": 0.3657374680042267, "learning_rate": 2.327920596474122e-05, "loss": 2.7316, "step": 8484 }, { "epoch": 0.6957519856520625, "grad_norm": 0.36833930015563965, "learning_rate": 2.3256293331241474e-05, "loss": 2.7217, "step": 8486 }, { "epoch": 0.6959159620804509, "grad_norm": 0.37160244584083557, "learning_rate": 2.3233388561665813e-05, "loss": 2.7181, "step": 8488 }, { "epoch": 0.6960799385088393, "grad_norm": 0.38849589228630066, "learning_rate": 2.3210491662749335e-05, "loss": 2.7136, "step": 8490 }, { "epoch": 0.6962439149372278, "grad_norm": 0.3978149890899658, "learning_rate": 2.3187602641224826e-05, "loss": 2.6372, "step": 8492 }, { "epoch": 0.6964078913656162, "grad_norm": 0.385545551776886, "learning_rate": 2.316472150382272e-05, "loss": 2.658, "step": 8494 }, { "epoch": 0.6965718677940046, "grad_norm": 0.3721058666706085, "learning_rate": 2.314184825727118e-05, "loss": 2.657, "step": 8496 }, { "epoch": 0.696735844222393, "grad_norm": 0.36420297622680664, "learning_rate": 2.3118982908296032e-05, "loss": 2.7071, "step": 8498 }, { "epoch": 0.6968998206507815, "grad_norm": 0.3748766779899597, "learning_rate": 2.3096125463620793e-05, "loss": 2.7168, "step": 8500 }, { "epoch": 0.6970637970791699, "grad_norm": 0.3913448452949524, "learning_rate": 2.3073275929966587e-05, "loss": 2.6669, "step": 8502 }, { "epoch": 0.6972277735075583, "grad_norm": 0.4111196994781494, "learning_rate": 2.3050434314052337e-05, "loss": 2.7442, "step": 8504 }, { "epoch": 0.6973917499359467, "grad_norm": 0.3807835280895233, "learning_rate": 2.3027600622594515e-05, "loss": 2.69, "step": 8506 }, { "epoch": 0.6975557263643352, "grad_norm": 0.37266266345977783, "learning_rate": 2.3004774862307326e-05, "loss": 2.7033, "step": 8508 }, { "epoch": 0.6977197027927236, "grad_norm": 0.3839258551597595, "learning_rate": 2.298195703990266e-05, "loss": 2.7077, "step": 8510 }, { "epoch": 0.6978836792211119, "grad_norm": 0.3883345127105713, "learning_rate": 2.295914716209e-05, "loss": 2.6701, "step": 8512 }, { "epoch": 0.6980476556495003, "grad_norm": 0.3678521513938904, "learning_rate": 2.293634523557655e-05, "loss": 2.7117, "step": 8514 }, { "epoch": 0.6982116320778888, "grad_norm": 0.37389418482780457, "learning_rate": 2.291355126706718e-05, "loss": 2.7516, "step": 8516 }, { "epoch": 0.6983756085062772, "grad_norm": 0.3578051030635834, "learning_rate": 2.2890765263264408e-05, "loss": 2.6889, "step": 8518 }, { "epoch": 0.6985395849346656, "grad_norm": 0.3696204125881195, "learning_rate": 2.286798723086835e-05, "loss": 2.6647, "step": 8520 }, { "epoch": 0.6987035613630541, "grad_norm": 0.36636796593666077, "learning_rate": 2.2845217176576912e-05, "loss": 2.6858, "step": 8522 }, { "epoch": 0.6988675377914425, "grad_norm": 0.39986589550971985, "learning_rate": 2.2822455107085518e-05, "loss": 2.695, "step": 8524 }, { "epoch": 0.6990315142198309, "grad_norm": 0.3809782862663269, "learning_rate": 2.2799701029087312e-05, "loss": 2.6948, "step": 8526 }, { "epoch": 0.6991954906482193, "grad_norm": 0.39051175117492676, "learning_rate": 2.277695494927309e-05, "loss": 2.7204, "step": 8528 }, { "epoch": 0.6993594670766078, "grad_norm": 0.36137253046035767, "learning_rate": 2.275421687433129e-05, "loss": 2.6864, "step": 8530 }, { "epoch": 0.6995234435049962, "grad_norm": 0.3832628130912781, "learning_rate": 2.273148681094796e-05, "loss": 2.7347, "step": 8532 }, { "epoch": 0.6996874199333846, "grad_norm": 0.3860900402069092, "learning_rate": 2.2708764765806838e-05, "loss": 2.7036, "step": 8534 }, { "epoch": 0.699851396361773, "grad_norm": 0.37296178936958313, "learning_rate": 2.268605074558931e-05, "loss": 2.698, "step": 8536 }, { "epoch": 0.7000153727901615, "grad_norm": 0.3842230439186096, "learning_rate": 2.2663344756974318e-05, "loss": 2.679, "step": 8538 }, { "epoch": 0.7001793492185499, "grad_norm": 0.376590371131897, "learning_rate": 2.2640646806638584e-05, "loss": 2.7453, "step": 8540 }, { "epoch": 0.7003433256469382, "grad_norm": 0.4014396369457245, "learning_rate": 2.2617956901256326e-05, "loss": 2.6497, "step": 8542 }, { "epoch": 0.7005073020753266, "grad_norm": 0.3732002377510071, "learning_rate": 2.2595275047499482e-05, "loss": 2.7022, "step": 8544 }, { "epoch": 0.7006712785037151, "grad_norm": 0.3643403649330139, "learning_rate": 2.2572601252037595e-05, "loss": 2.7281, "step": 8546 }, { "epoch": 0.7008352549321035, "grad_norm": 0.3596980571746826, "learning_rate": 2.254993552153786e-05, "loss": 2.6838, "step": 8548 }, { "epoch": 0.7009992313604919, "grad_norm": 0.35688257217407227, "learning_rate": 2.252727786266502e-05, "loss": 2.6995, "step": 8550 }, { "epoch": 0.7011632077888803, "grad_norm": 0.3599395453929901, "learning_rate": 2.250462828208159e-05, "loss": 2.6761, "step": 8552 }, { "epoch": 0.7013271842172688, "grad_norm": 0.3549034297466278, "learning_rate": 2.2481986786447568e-05, "loss": 2.6499, "step": 8554 }, { "epoch": 0.7014911606456572, "grad_norm": 0.36290597915649414, "learning_rate": 2.245935338242065e-05, "loss": 2.7377, "step": 8556 }, { "epoch": 0.7016551370740456, "grad_norm": 0.38815751671791077, "learning_rate": 2.2436728076656155e-05, "loss": 2.6821, "step": 8558 }, { "epoch": 0.701819113502434, "grad_norm": 0.3571638762950897, "learning_rate": 2.2414110875806958e-05, "loss": 2.7128, "step": 8560 }, { "epoch": 0.7019830899308225, "grad_norm": 0.36378180980682373, "learning_rate": 2.239150178652362e-05, "loss": 2.6604, "step": 8562 }, { "epoch": 0.7021470663592109, "grad_norm": 0.3422406315803528, "learning_rate": 2.2368900815454287e-05, "loss": 2.7672, "step": 8564 }, { "epoch": 0.7023110427875993, "grad_norm": 0.3636515438556671, "learning_rate": 2.234630796924474e-05, "loss": 2.6835, "step": 8566 }, { "epoch": 0.7024750192159877, "grad_norm": 0.35900112986564636, "learning_rate": 2.2323723254538297e-05, "loss": 2.7209, "step": 8568 }, { "epoch": 0.7026389956443762, "grad_norm": 0.3729889988899231, "learning_rate": 2.2301146677976015e-05, "loss": 2.6812, "step": 8570 }, { "epoch": 0.7028029720727645, "grad_norm": 0.3816934823989868, "learning_rate": 2.2278578246196425e-05, "loss": 2.6851, "step": 8572 }, { "epoch": 0.7029669485011529, "grad_norm": 0.3625105321407318, "learning_rate": 2.2256017965835747e-05, "loss": 2.6295, "step": 8574 }, { "epoch": 0.7031309249295413, "grad_norm": 0.36865508556365967, "learning_rate": 2.2233465843527766e-05, "loss": 2.7283, "step": 8576 }, { "epoch": 0.7032949013579298, "grad_norm": 0.37290316820144653, "learning_rate": 2.221092188590391e-05, "loss": 2.7066, "step": 8578 }, { "epoch": 0.7034588777863182, "grad_norm": 0.36878013610839844, "learning_rate": 2.218838609959314e-05, "loss": 2.6875, "step": 8580 }, { "epoch": 0.7036228542147066, "grad_norm": 0.3616272211074829, "learning_rate": 2.216585849122206e-05, "loss": 2.6473, "step": 8582 }, { "epoch": 0.7037868306430951, "grad_norm": 0.3588869869709015, "learning_rate": 2.2143339067414887e-05, "loss": 2.6438, "step": 8584 }, { "epoch": 0.7039508070714835, "grad_norm": 0.3697204291820526, "learning_rate": 2.2120827834793345e-05, "loss": 2.682, "step": 8586 }, { "epoch": 0.7041147834998719, "grad_norm": 0.3795563578605652, "learning_rate": 2.2098324799976883e-05, "loss": 2.6897, "step": 8588 }, { "epoch": 0.7042787599282603, "grad_norm": 0.39331692457199097, "learning_rate": 2.207582996958242e-05, "loss": 2.6888, "step": 8590 }, { "epoch": 0.7044427363566488, "grad_norm": 0.39076825976371765, "learning_rate": 2.205334335022451e-05, "loss": 2.7245, "step": 8592 }, { "epoch": 0.7046067127850372, "grad_norm": 0.36164453625679016, "learning_rate": 2.20308649485153e-05, "loss": 2.7087, "step": 8594 }, { "epoch": 0.7047706892134256, "grad_norm": 0.36285480856895447, "learning_rate": 2.200839477106453e-05, "loss": 2.7544, "step": 8596 }, { "epoch": 0.704934665641814, "grad_norm": 0.40612390637397766, "learning_rate": 2.198593282447946e-05, "loss": 2.701, "step": 8598 }, { "epoch": 0.7050986420702025, "grad_norm": 0.3629796802997589, "learning_rate": 2.1963479115365e-05, "loss": 2.7245, "step": 8600 }, { "epoch": 0.7052626184985908, "grad_norm": 0.4071548581123352, "learning_rate": 2.1941033650323605e-05, "loss": 2.6904, "step": 8602 }, { "epoch": 0.7054265949269792, "grad_norm": 0.41032174229621887, "learning_rate": 2.1918596435955315e-05, "loss": 2.7452, "step": 8604 }, { "epoch": 0.7055905713553676, "grad_norm": 0.35707178711891174, "learning_rate": 2.189616747885775e-05, "loss": 2.6882, "step": 8606 }, { "epoch": 0.7057545477837561, "grad_norm": 0.37731117010116577, "learning_rate": 2.1873746785626063e-05, "loss": 2.6776, "step": 8608 }, { "epoch": 0.7059185242121445, "grad_norm": 0.3739245533943176, "learning_rate": 2.1851334362853023e-05, "loss": 2.7326, "step": 8610 }, { "epoch": 0.7060825006405329, "grad_norm": 0.3722072243690491, "learning_rate": 2.1828930217128947e-05, "loss": 2.7326, "step": 8612 }, { "epoch": 0.7062464770689213, "grad_norm": 0.37407660484313965, "learning_rate": 2.1806534355041746e-05, "loss": 2.6822, "step": 8614 }, { "epoch": 0.7064104534973098, "grad_norm": 0.3598092794418335, "learning_rate": 2.1784146783176807e-05, "loss": 2.6872, "step": 8616 }, { "epoch": 0.7065744299256982, "grad_norm": 0.34601491689682007, "learning_rate": 2.1761767508117225e-05, "loss": 2.6579, "step": 8618 }, { "epoch": 0.7067384063540866, "grad_norm": 0.3648231625556946, "learning_rate": 2.1739396536443512e-05, "loss": 2.7005, "step": 8620 }, { "epoch": 0.706902382782475, "grad_norm": 0.3847854733467102, "learning_rate": 2.1717033874733827e-05, "loss": 2.6606, "step": 8622 }, { "epoch": 0.7070663592108635, "grad_norm": 0.37499886751174927, "learning_rate": 2.169467952956386e-05, "loss": 2.6549, "step": 8624 }, { "epoch": 0.7072303356392519, "grad_norm": 0.37262287735939026, "learning_rate": 2.1672333507506837e-05, "loss": 2.7011, "step": 8626 }, { "epoch": 0.7073943120676403, "grad_norm": 0.3709699213504791, "learning_rate": 2.1649995815133555e-05, "loss": 2.6961, "step": 8628 }, { "epoch": 0.7075582884960286, "grad_norm": 0.3597058951854706, "learning_rate": 2.1627666459012365e-05, "loss": 2.6823, "step": 8630 }, { "epoch": 0.7077222649244171, "grad_norm": 0.37352320551872253, "learning_rate": 2.1605345445709185e-05, "loss": 2.6511, "step": 8632 }, { "epoch": 0.7078862413528055, "grad_norm": 0.357758492231369, "learning_rate": 2.1583032781787393e-05, "loss": 2.6536, "step": 8634 }, { "epoch": 0.7080502177811939, "grad_norm": 0.36342495679855347, "learning_rate": 2.156072847380806e-05, "loss": 2.6883, "step": 8636 }, { "epoch": 0.7082141942095824, "grad_norm": 0.36827418208122253, "learning_rate": 2.153843252832965e-05, "loss": 2.6701, "step": 8638 }, { "epoch": 0.7083781706379708, "grad_norm": 0.35471510887145996, "learning_rate": 2.1516144951908256e-05, "loss": 2.6893, "step": 8640 }, { "epoch": 0.7085421470663592, "grad_norm": 0.3700093924999237, "learning_rate": 2.1493865751097486e-05, "loss": 2.6763, "step": 8642 }, { "epoch": 0.7087061234947476, "grad_norm": 0.3778194189071655, "learning_rate": 2.147159493244851e-05, "loss": 2.7109, "step": 8644 }, { "epoch": 0.7088700999231361, "grad_norm": 0.36272796988487244, "learning_rate": 2.1449332502509966e-05, "loss": 2.6617, "step": 8646 }, { "epoch": 0.7090340763515245, "grad_norm": 0.34045878052711487, "learning_rate": 2.1427078467828094e-05, "loss": 2.6436, "step": 8648 }, { "epoch": 0.7091980527799129, "grad_norm": 0.3516055941581726, "learning_rate": 2.1404832834946647e-05, "loss": 2.7442, "step": 8650 }, { "epoch": 0.7093620292083013, "grad_norm": 0.3637882471084595, "learning_rate": 2.1382595610406864e-05, "loss": 2.699, "step": 8652 }, { "epoch": 0.7095260056366898, "grad_norm": 0.38303273916244507, "learning_rate": 2.136036680074761e-05, "loss": 2.731, "step": 8654 }, { "epoch": 0.7096899820650782, "grad_norm": 0.34832051396369934, "learning_rate": 2.133814641250516e-05, "loss": 2.6747, "step": 8656 }, { "epoch": 0.7098539584934666, "grad_norm": 0.35098278522491455, "learning_rate": 2.1315934452213387e-05, "loss": 2.6564, "step": 8658 }, { "epoch": 0.7100179349218549, "grad_norm": 0.3803635835647583, "learning_rate": 2.1293730926403673e-05, "loss": 2.7287, "step": 8660 }, { "epoch": 0.7101819113502434, "grad_norm": 0.38908708095550537, "learning_rate": 2.1271535841604913e-05, "loss": 2.6782, "step": 8662 }, { "epoch": 0.7103458877786318, "grad_norm": 0.36467596888542175, "learning_rate": 2.1249349204343484e-05, "loss": 2.7013, "step": 8664 }, { "epoch": 0.7105098642070202, "grad_norm": 0.3772580623626709, "learning_rate": 2.1227171021143372e-05, "loss": 2.6869, "step": 8666 }, { "epoch": 0.7106738406354086, "grad_norm": 0.35389816761016846, "learning_rate": 2.120500129852597e-05, "loss": 2.7237, "step": 8668 }, { "epoch": 0.7108378170637971, "grad_norm": 0.37267571687698364, "learning_rate": 2.118284004301026e-05, "loss": 2.6979, "step": 8670 }, { "epoch": 0.7110017934921855, "grad_norm": 0.35404422879219055, "learning_rate": 2.1160687261112722e-05, "loss": 2.713, "step": 8672 }, { "epoch": 0.7111657699205739, "grad_norm": 0.3572290539741516, "learning_rate": 2.1138542959347262e-05, "loss": 2.7269, "step": 8674 }, { "epoch": 0.7113297463489623, "grad_norm": 0.4067935645580292, "learning_rate": 2.1116407144225448e-05, "loss": 2.7234, "step": 8676 }, { "epoch": 0.7114937227773508, "grad_norm": 0.3761785328388214, "learning_rate": 2.1094279822256195e-05, "loss": 2.6987, "step": 8678 }, { "epoch": 0.7116576992057392, "grad_norm": 0.3632296919822693, "learning_rate": 2.107216099994603e-05, "loss": 2.721, "step": 8680 }, { "epoch": 0.7118216756341276, "grad_norm": 0.41475990414619446, "learning_rate": 2.1050050683798917e-05, "loss": 2.7017, "step": 8682 }, { "epoch": 0.711985652062516, "grad_norm": 0.3945857584476471, "learning_rate": 2.1027948880316372e-05, "loss": 2.707, "step": 8684 }, { "epoch": 0.7121496284909045, "grad_norm": 0.37521371245384216, "learning_rate": 2.1005855595997327e-05, "loss": 2.6778, "step": 8686 }, { "epoch": 0.7123136049192929, "grad_norm": 0.36565887928009033, "learning_rate": 2.0983770837338325e-05, "loss": 2.6422, "step": 8688 }, { "epoch": 0.7124775813476812, "grad_norm": 0.36480435729026794, "learning_rate": 2.0961694610833287e-05, "loss": 2.6999, "step": 8690 }, { "epoch": 0.7126415577760697, "grad_norm": 0.3911430835723877, "learning_rate": 2.09396269229737e-05, "loss": 2.6891, "step": 8692 }, { "epoch": 0.7128055342044581, "grad_norm": 0.3548656105995178, "learning_rate": 2.091756778024852e-05, "loss": 2.6749, "step": 8694 }, { "epoch": 0.7129695106328465, "grad_norm": 0.3688564598560333, "learning_rate": 2.0895517189144165e-05, "loss": 2.7075, "step": 8696 }, { "epoch": 0.7131334870612349, "grad_norm": 0.36516255140304565, "learning_rate": 2.087347515614456e-05, "loss": 2.6884, "step": 8698 }, { "epoch": 0.7132974634896234, "grad_norm": 0.37855157256126404, "learning_rate": 2.0851441687731133e-05, "loss": 2.6683, "step": 8700 }, { "epoch": 0.7134614399180118, "grad_norm": 0.3581778109073639, "learning_rate": 2.0829416790382772e-05, "loss": 2.6664, "step": 8702 }, { "epoch": 0.7136254163464002, "grad_norm": 0.35958313941955566, "learning_rate": 2.080740047057581e-05, "loss": 2.6719, "step": 8704 }, { "epoch": 0.7137893927747886, "grad_norm": 0.37104856967926025, "learning_rate": 2.0785392734784155e-05, "loss": 2.7047, "step": 8706 }, { "epoch": 0.7139533692031771, "grad_norm": 0.36019831895828247, "learning_rate": 2.0763393589479074e-05, "loss": 2.7314, "step": 8708 }, { "epoch": 0.7141173456315655, "grad_norm": 0.3699694871902466, "learning_rate": 2.074140304112939e-05, "loss": 2.6927, "step": 8710 }, { "epoch": 0.7142813220599539, "grad_norm": 0.3692975342273712, "learning_rate": 2.0719421096201368e-05, "loss": 2.6529, "step": 8712 }, { "epoch": 0.7144452984883423, "grad_norm": 0.35241538286209106, "learning_rate": 2.0697447761158773e-05, "loss": 2.6626, "step": 8714 }, { "epoch": 0.7146092749167308, "grad_norm": 0.34361711144447327, "learning_rate": 2.0675483042462764e-05, "loss": 2.676, "step": 8716 }, { "epoch": 0.7147732513451192, "grad_norm": 0.36636024713516235, "learning_rate": 2.0653526946572037e-05, "loss": 2.6651, "step": 8718 }, { "epoch": 0.7149372277735075, "grad_norm": 0.3608117401599884, "learning_rate": 2.0631579479942752e-05, "loss": 2.6777, "step": 8720 }, { "epoch": 0.7151012042018959, "grad_norm": 0.37384918332099915, "learning_rate": 2.060964064902845e-05, "loss": 2.7356, "step": 8722 }, { "epoch": 0.7152651806302844, "grad_norm": 0.3529508709907532, "learning_rate": 2.0587710460280275e-05, "loss": 2.7252, "step": 8724 }, { "epoch": 0.7154291570586728, "grad_norm": 0.3796766698360443, "learning_rate": 2.0565788920146683e-05, "loss": 2.6545, "step": 8726 }, { "epoch": 0.7155931334870612, "grad_norm": 0.3741827607154846, "learning_rate": 2.0543876035073672e-05, "loss": 2.6834, "step": 8728 }, { "epoch": 0.7157571099154496, "grad_norm": 0.38752180337905884, "learning_rate": 2.052197181150468e-05, "loss": 2.696, "step": 8730 }, { "epoch": 0.7159210863438381, "grad_norm": 0.360067754983902, "learning_rate": 2.0500076255880608e-05, "loss": 2.6315, "step": 8732 }, { "epoch": 0.7160850627722265, "grad_norm": 0.3775290250778198, "learning_rate": 2.0478189374639752e-05, "loss": 2.6661, "step": 8734 }, { "epoch": 0.7162490392006149, "grad_norm": 0.379972368478775, "learning_rate": 2.0456311174217912e-05, "loss": 2.6948, "step": 8736 }, { "epoch": 0.7164130156290033, "grad_norm": 0.3910143971443176, "learning_rate": 2.0434441661048338e-05, "loss": 2.7446, "step": 8738 }, { "epoch": 0.7165769920573918, "grad_norm": 0.3933928906917572, "learning_rate": 2.041258084156169e-05, "loss": 2.7033, "step": 8740 }, { "epoch": 0.7167409684857802, "grad_norm": 0.3594576418399811, "learning_rate": 2.0390728722186126e-05, "loss": 2.6679, "step": 8742 }, { "epoch": 0.7169049449141686, "grad_norm": 0.35524871945381165, "learning_rate": 2.0368885309347162e-05, "loss": 2.6864, "step": 8744 }, { "epoch": 0.717068921342557, "grad_norm": 0.3604062497615814, "learning_rate": 2.0347050609467822e-05, "loss": 2.7033, "step": 8746 }, { "epoch": 0.7172328977709455, "grad_norm": 0.37317147850990295, "learning_rate": 2.0325224628968552e-05, "loss": 2.6738, "step": 8748 }, { "epoch": 0.7173968741993338, "grad_norm": 0.3843664526939392, "learning_rate": 2.0303407374267253e-05, "loss": 2.6998, "step": 8750 }, { "epoch": 0.7175608506277222, "grad_norm": 0.3630846440792084, "learning_rate": 2.028159885177917e-05, "loss": 2.6581, "step": 8752 }, { "epoch": 0.7177248270561107, "grad_norm": 0.38013482093811035, "learning_rate": 2.025979906791713e-05, "loss": 2.6389, "step": 8754 }, { "epoch": 0.7178888034844991, "grad_norm": 0.3656391203403473, "learning_rate": 2.023800802909125e-05, "loss": 2.662, "step": 8756 }, { "epoch": 0.7180527799128875, "grad_norm": 0.377647340297699, "learning_rate": 2.0216225741709154e-05, "loss": 2.7053, "step": 8758 }, { "epoch": 0.7182167563412759, "grad_norm": 0.383730947971344, "learning_rate": 2.0194452212175885e-05, "loss": 2.6918, "step": 8760 }, { "epoch": 0.7183807327696644, "grad_norm": 0.391690731048584, "learning_rate": 2.0172687446893873e-05, "loss": 2.7823, "step": 8762 }, { "epoch": 0.7185447091980528, "grad_norm": 0.3921140432357788, "learning_rate": 2.0150931452263005e-05, "loss": 2.6591, "step": 8764 }, { "epoch": 0.7187086856264412, "grad_norm": 0.34550994634628296, "learning_rate": 2.012918423468058e-05, "loss": 2.6826, "step": 8766 }, { "epoch": 0.7188726620548296, "grad_norm": 0.38729730248451233, "learning_rate": 2.010744580054134e-05, "loss": 2.7353, "step": 8768 }, { "epoch": 0.7190366384832181, "grad_norm": 0.3607577383518219, "learning_rate": 2.0085716156237362e-05, "loss": 2.7071, "step": 8770 }, { "epoch": 0.7192006149116065, "grad_norm": 0.39334240555763245, "learning_rate": 2.0063995308158273e-05, "loss": 2.6816, "step": 8772 }, { "epoch": 0.7193645913399949, "grad_norm": 0.38497117161750793, "learning_rate": 2.0042283262690975e-05, "loss": 2.6765, "step": 8774 }, { "epoch": 0.7195285677683833, "grad_norm": 0.35287126898765564, "learning_rate": 2.002058002621987e-05, "loss": 2.68, "step": 8776 }, { "epoch": 0.7196925441967718, "grad_norm": 0.38193613290786743, "learning_rate": 1.9998885605126743e-05, "loss": 2.7159, "step": 8778 }, { "epoch": 0.7198565206251601, "grad_norm": 0.36058685183525085, "learning_rate": 1.9977200005790804e-05, "loss": 2.7078, "step": 8780 }, { "epoch": 0.7200204970535485, "grad_norm": 0.3733639419078827, "learning_rate": 1.9955523234588613e-05, "loss": 2.7121, "step": 8782 }, { "epoch": 0.7201844734819369, "grad_norm": 0.3472130298614502, "learning_rate": 1.9933855297894196e-05, "loss": 2.6841, "step": 8784 }, { "epoch": 0.7203484499103254, "grad_norm": 0.37057337164878845, "learning_rate": 1.9912196202078976e-05, "loss": 2.6948, "step": 8786 }, { "epoch": 0.7205124263387138, "grad_norm": 0.34663885831832886, "learning_rate": 1.9890545953511702e-05, "loss": 2.6712, "step": 8788 }, { "epoch": 0.7206764027671022, "grad_norm": 0.3575139045715332, "learning_rate": 1.9868904558558655e-05, "loss": 2.6965, "step": 8790 }, { "epoch": 0.7208403791954906, "grad_norm": 0.394976407289505, "learning_rate": 1.9847272023583378e-05, "loss": 2.7299, "step": 8792 }, { "epoch": 0.7210043556238791, "grad_norm": 0.39025330543518066, "learning_rate": 1.9825648354946897e-05, "loss": 2.6544, "step": 8794 }, { "epoch": 0.7211683320522675, "grad_norm": 0.3627324402332306, "learning_rate": 1.9804033559007585e-05, "loss": 2.6766, "step": 8796 }, { "epoch": 0.7213323084806559, "grad_norm": 0.3690572679042816, "learning_rate": 1.978242764212125e-05, "loss": 2.618, "step": 8798 }, { "epoch": 0.7214962849090443, "grad_norm": 0.38577893376350403, "learning_rate": 1.9760830610640994e-05, "loss": 2.6724, "step": 8800 }, { "epoch": 0.7216602613374328, "grad_norm": 0.3648007810115814, "learning_rate": 1.9739242470917468e-05, "loss": 2.6598, "step": 8802 }, { "epoch": 0.7218242377658212, "grad_norm": 0.3590453863143921, "learning_rate": 1.971766322929854e-05, "loss": 2.6617, "step": 8804 }, { "epoch": 0.7219882141942096, "grad_norm": 0.37548232078552246, "learning_rate": 1.9696092892129565e-05, "loss": 2.6536, "step": 8806 }, { "epoch": 0.7221521906225981, "grad_norm": 0.4404735565185547, "learning_rate": 1.9674531465753254e-05, "loss": 2.6849, "step": 8808 }, { "epoch": 0.7223161670509864, "grad_norm": 0.3744325041770935, "learning_rate": 1.9652978956509675e-05, "loss": 2.7112, "step": 8810 }, { "epoch": 0.7224801434793748, "grad_norm": 0.35116636753082275, "learning_rate": 1.96314353707363e-05, "loss": 2.6774, "step": 8812 }, { "epoch": 0.7226441199077632, "grad_norm": 0.40838176012039185, "learning_rate": 1.9609900714767977e-05, "loss": 2.7045, "step": 8814 }, { "epoch": 0.7228080963361517, "grad_norm": 0.3593634068965912, "learning_rate": 1.9588374994936927e-05, "loss": 2.6728, "step": 8816 }, { "epoch": 0.7229720727645401, "grad_norm": 0.3494911193847656, "learning_rate": 1.9566858217572694e-05, "loss": 2.6689, "step": 8818 }, { "epoch": 0.7231360491929285, "grad_norm": 0.3753405511379242, "learning_rate": 1.9545350389002304e-05, "loss": 2.6964, "step": 8820 }, { "epoch": 0.7233000256213169, "grad_norm": 0.36216264963150024, "learning_rate": 1.952385151555003e-05, "loss": 2.7152, "step": 8822 }, { "epoch": 0.7234640020497054, "grad_norm": 0.3647003471851349, "learning_rate": 1.9502361603537583e-05, "loss": 2.7221, "step": 8824 }, { "epoch": 0.7236279784780938, "grad_norm": 0.36026209592819214, "learning_rate": 1.9480880659284022e-05, "loss": 2.6614, "step": 8826 }, { "epoch": 0.7237919549064822, "grad_norm": 0.36493557691574097, "learning_rate": 1.9459408689105786e-05, "loss": 2.7036, "step": 8828 }, { "epoch": 0.7239559313348706, "grad_norm": 0.3542688190937042, "learning_rate": 1.9437945699316617e-05, "loss": 2.6828, "step": 8830 }, { "epoch": 0.7241199077632591, "grad_norm": 0.3724217116832733, "learning_rate": 1.9416491696227677e-05, "loss": 2.7026, "step": 8832 }, { "epoch": 0.7242838841916475, "grad_norm": 0.37069764733314514, "learning_rate": 1.9395046686147488e-05, "loss": 2.6942, "step": 8834 }, { "epoch": 0.7244478606200359, "grad_norm": 0.3715781271457672, "learning_rate": 1.9373610675381847e-05, "loss": 2.6688, "step": 8836 }, { "epoch": 0.7246118370484242, "grad_norm": 0.36626240611076355, "learning_rate": 1.9352183670234038e-05, "loss": 2.7179, "step": 8838 }, { "epoch": 0.7247758134768127, "grad_norm": 0.38260751962661743, "learning_rate": 1.9330765677004563e-05, "loss": 2.6749, "step": 8840 }, { "epoch": 0.7249397899052011, "grad_norm": 0.367906391620636, "learning_rate": 1.930935670199135e-05, "loss": 2.686, "step": 8842 }, { "epoch": 0.7251037663335895, "grad_norm": 0.3517436385154724, "learning_rate": 1.9287956751489662e-05, "loss": 2.6911, "step": 8844 }, { "epoch": 0.7252677427619779, "grad_norm": 0.363772451877594, "learning_rate": 1.9266565831792114e-05, "loss": 2.6554, "step": 8846 }, { "epoch": 0.7254317191903664, "grad_norm": 0.3705129325389862, "learning_rate": 1.9245183949188634e-05, "loss": 2.6706, "step": 8848 }, { "epoch": 0.7255956956187548, "grad_norm": 0.3884037137031555, "learning_rate": 1.922381110996652e-05, "loss": 2.6553, "step": 8850 }, { "epoch": 0.7257596720471432, "grad_norm": 0.3646256625652313, "learning_rate": 1.9202447320410412e-05, "loss": 2.7096, "step": 8852 }, { "epoch": 0.7259236484755316, "grad_norm": 0.3645271360874176, "learning_rate": 1.9181092586802274e-05, "loss": 2.6916, "step": 8854 }, { "epoch": 0.7260876249039201, "grad_norm": 0.36142271757125854, "learning_rate": 1.915974691542144e-05, "loss": 2.6765, "step": 8856 }, { "epoch": 0.7262516013323085, "grad_norm": 0.36564940214157104, "learning_rate": 1.913841031254452e-05, "loss": 2.6845, "step": 8858 }, { "epoch": 0.7264155777606969, "grad_norm": 0.3475258946418762, "learning_rate": 1.91170827844455e-05, "loss": 2.6721, "step": 8860 }, { "epoch": 0.7265795541890853, "grad_norm": 0.32706737518310547, "learning_rate": 1.9095764337395706e-05, "loss": 2.621, "step": 8862 }, { "epoch": 0.7267435306174738, "grad_norm": 0.36696481704711914, "learning_rate": 1.9074454977663774e-05, "loss": 2.7096, "step": 8864 }, { "epoch": 0.7269075070458622, "grad_norm": 0.3692162036895752, "learning_rate": 1.9053154711515638e-05, "loss": 2.649, "step": 8866 }, { "epoch": 0.7270714834742505, "grad_norm": 0.3444785177707672, "learning_rate": 1.9031863545214653e-05, "loss": 2.7086, "step": 8868 }, { "epoch": 0.727235459902639, "grad_norm": 0.35217925906181335, "learning_rate": 1.9010581485021378e-05, "loss": 2.7041, "step": 8870 }, { "epoch": 0.7273994363310274, "grad_norm": 0.3627830147743225, "learning_rate": 1.8989308537193777e-05, "loss": 2.6864, "step": 8872 }, { "epoch": 0.7275634127594158, "grad_norm": 0.3672342300415039, "learning_rate": 1.896804470798713e-05, "loss": 2.6632, "step": 8874 }, { "epoch": 0.7277273891878042, "grad_norm": 0.35386422276496887, "learning_rate": 1.894679000365398e-05, "loss": 2.6621, "step": 8876 }, { "epoch": 0.7278913656161927, "grad_norm": 0.36656153202056885, "learning_rate": 1.8925544430444235e-05, "loss": 2.6506, "step": 8878 }, { "epoch": 0.7280553420445811, "grad_norm": 0.37069380283355713, "learning_rate": 1.8904307994605108e-05, "loss": 2.7138, "step": 8880 }, { "epoch": 0.7282193184729695, "grad_norm": 0.364506334066391, "learning_rate": 1.8883080702381144e-05, "loss": 2.7046, "step": 8882 }, { "epoch": 0.7283832949013579, "grad_norm": 0.3649429678916931, "learning_rate": 1.8861862560014128e-05, "loss": 2.6496, "step": 8884 }, { "epoch": 0.7285472713297464, "grad_norm": 0.3707544207572937, "learning_rate": 1.884065357374326e-05, "loss": 2.7451, "step": 8886 }, { "epoch": 0.7287112477581348, "grad_norm": 0.37504974007606506, "learning_rate": 1.8819453749804956e-05, "loss": 2.6887, "step": 8888 }, { "epoch": 0.7288752241865232, "grad_norm": 0.38775527477264404, "learning_rate": 1.8798263094432987e-05, "loss": 2.6669, "step": 8890 }, { "epoch": 0.7290392006149116, "grad_norm": 0.3648729920387268, "learning_rate": 1.8777081613858416e-05, "loss": 2.6157, "step": 8892 }, { "epoch": 0.7292031770433001, "grad_norm": 0.34821397066116333, "learning_rate": 1.8755909314309616e-05, "loss": 2.6994, "step": 8894 }, { "epoch": 0.7293671534716885, "grad_norm": 0.3641124963760376, "learning_rate": 1.8734746202012233e-05, "loss": 2.7311, "step": 8896 }, { "epoch": 0.7295311299000768, "grad_norm": 0.3881944715976715, "learning_rate": 1.8713592283189235e-05, "loss": 2.7025, "step": 8898 }, { "epoch": 0.7296951063284652, "grad_norm": 0.37551623582839966, "learning_rate": 1.869244756406091e-05, "loss": 2.7076, "step": 8900 }, { "epoch": 0.7298590827568537, "grad_norm": 0.38129666447639465, "learning_rate": 1.8671312050844753e-05, "loss": 2.7015, "step": 8902 }, { "epoch": 0.7300230591852421, "grad_norm": 0.35345518589019775, "learning_rate": 1.8650185749755678e-05, "loss": 2.6657, "step": 8904 }, { "epoch": 0.7301870356136305, "grad_norm": 0.3613397777080536, "learning_rate": 1.8629068667005784e-05, "loss": 2.6862, "step": 8906 }, { "epoch": 0.7303510120420189, "grad_norm": 0.38645192980766296, "learning_rate": 1.8607960808804503e-05, "loss": 2.6648, "step": 8908 }, { "epoch": 0.7305149884704074, "grad_norm": 0.3691881597042084, "learning_rate": 1.8586862181358566e-05, "loss": 2.6725, "step": 8910 }, { "epoch": 0.7306789648987958, "grad_norm": 0.3389580249786377, "learning_rate": 1.8565772790871984e-05, "loss": 2.6795, "step": 8912 }, { "epoch": 0.7308429413271842, "grad_norm": 0.3590739667415619, "learning_rate": 1.8544692643545984e-05, "loss": 2.6866, "step": 8914 }, { "epoch": 0.7310069177555726, "grad_norm": 0.356611967086792, "learning_rate": 1.8523621745579218e-05, "loss": 2.7193, "step": 8916 }, { "epoch": 0.7311708941839611, "grad_norm": 0.3655679225921631, "learning_rate": 1.850256010316747e-05, "loss": 2.6451, "step": 8918 }, { "epoch": 0.7313348706123495, "grad_norm": 0.3675282895565033, "learning_rate": 1.8481507722503893e-05, "loss": 2.6803, "step": 8920 }, { "epoch": 0.7314988470407379, "grad_norm": 0.368619829416275, "learning_rate": 1.84604646097789e-05, "loss": 2.7248, "step": 8922 }, { "epoch": 0.7316628234691264, "grad_norm": 0.38850507140159607, "learning_rate": 1.8439430771180138e-05, "loss": 2.6808, "step": 8924 }, { "epoch": 0.7318267998975148, "grad_norm": 0.35582661628723145, "learning_rate": 1.8418406212892576e-05, "loss": 2.6323, "step": 8926 }, { "epoch": 0.7319907763259031, "grad_norm": 0.380453884601593, "learning_rate": 1.839739094109843e-05, "loss": 2.6512, "step": 8928 }, { "epoch": 0.7321547527542915, "grad_norm": 0.3536703586578369, "learning_rate": 1.837638496197721e-05, "loss": 2.7053, "step": 8930 }, { "epoch": 0.73231872918268, "grad_norm": 0.35601118206977844, "learning_rate": 1.8355388281705632e-05, "loss": 2.7351, "step": 8932 }, { "epoch": 0.7324827056110684, "grad_norm": 0.35562974214553833, "learning_rate": 1.8334400906457775e-05, "loss": 2.6014, "step": 8934 }, { "epoch": 0.7326466820394568, "grad_norm": 0.36037373542785645, "learning_rate": 1.831342284240489e-05, "loss": 2.6584, "step": 8936 }, { "epoch": 0.7328106584678452, "grad_norm": 0.3486712872982025, "learning_rate": 1.829245409571554e-05, "loss": 2.7057, "step": 8938 }, { "epoch": 0.7329746348962337, "grad_norm": 0.35479119420051575, "learning_rate": 1.8271494672555527e-05, "loss": 2.6537, "step": 8940 }, { "epoch": 0.7331386113246221, "grad_norm": 0.35784339904785156, "learning_rate": 1.8250544579087953e-05, "loss": 2.6107, "step": 8942 }, { "epoch": 0.7333025877530105, "grad_norm": 0.353708416223526, "learning_rate": 1.8229603821473095e-05, "loss": 2.6633, "step": 8944 }, { "epoch": 0.7334665641813989, "grad_norm": 0.3418165445327759, "learning_rate": 1.8208672405868553e-05, "loss": 2.6811, "step": 8946 }, { "epoch": 0.7336305406097874, "grad_norm": 0.3424532413482666, "learning_rate": 1.8187750338429187e-05, "loss": 2.6729, "step": 8948 }, { "epoch": 0.7337945170381758, "grad_norm": 0.3533565402030945, "learning_rate": 1.816683762530702e-05, "loss": 2.6634, "step": 8950 }, { "epoch": 0.7339584934665642, "grad_norm": 0.330537348985672, "learning_rate": 1.8145934272651467e-05, "loss": 2.6244, "step": 8952 }, { "epoch": 0.7341224698949526, "grad_norm": 0.34000977873802185, "learning_rate": 1.812504028660904e-05, "loss": 2.7216, "step": 8954 }, { "epoch": 0.7342864463233411, "grad_norm": 0.3350144624710083, "learning_rate": 1.8104155673323602e-05, "loss": 2.664, "step": 8956 }, { "epoch": 0.7344504227517294, "grad_norm": 0.35844945907592773, "learning_rate": 1.8083280438936213e-05, "loss": 2.6992, "step": 8958 }, { "epoch": 0.7346143991801178, "grad_norm": 0.3489479422569275, "learning_rate": 1.8062414589585208e-05, "loss": 2.7228, "step": 8960 }, { "epoch": 0.7347783756085062, "grad_norm": 0.35961857438087463, "learning_rate": 1.8041558131406088e-05, "loss": 2.7139, "step": 8962 }, { "epoch": 0.7349423520368947, "grad_norm": 0.36380940675735474, "learning_rate": 1.8020711070531714e-05, "loss": 2.5657, "step": 8964 }, { "epoch": 0.7351063284652831, "grad_norm": 0.34669262170791626, "learning_rate": 1.7999873413092066e-05, "loss": 2.7073, "step": 8966 }, { "epoch": 0.7352703048936715, "grad_norm": 0.36911582946777344, "learning_rate": 1.7979045165214425e-05, "loss": 2.7098, "step": 8968 }, { "epoch": 0.7354342813220599, "grad_norm": 0.3712131381034851, "learning_rate": 1.7958226333023297e-05, "loss": 2.6947, "step": 8970 }, { "epoch": 0.7355982577504484, "grad_norm": 0.36578479409217834, "learning_rate": 1.7937416922640395e-05, "loss": 2.7744, "step": 8972 }, { "epoch": 0.7357622341788368, "grad_norm": 0.3710160553455353, "learning_rate": 1.791661694018468e-05, "loss": 2.6436, "step": 8974 }, { "epoch": 0.7359262106072252, "grad_norm": 0.3627588748931885, "learning_rate": 1.7895826391772335e-05, "loss": 2.7135, "step": 8976 }, { "epoch": 0.7360901870356137, "grad_norm": 0.3674503266811371, "learning_rate": 1.7875045283516802e-05, "loss": 2.6796, "step": 8978 }, { "epoch": 0.7362541634640021, "grad_norm": 0.3505978584289551, "learning_rate": 1.7854273621528656e-05, "loss": 2.673, "step": 8980 }, { "epoch": 0.7364181398923905, "grad_norm": 0.36206257343292236, "learning_rate": 1.783351141191582e-05, "loss": 2.6763, "step": 8982 }, { "epoch": 0.7365821163207789, "grad_norm": 0.3386150598526001, "learning_rate": 1.7812758660783336e-05, "loss": 2.6658, "step": 8984 }, { "epoch": 0.7367460927491674, "grad_norm": 0.36186760663986206, "learning_rate": 1.779201537423351e-05, "loss": 2.6366, "step": 8986 }, { "epoch": 0.7369100691775557, "grad_norm": 0.3517228960990906, "learning_rate": 1.777128155836586e-05, "loss": 2.6988, "step": 8988 }, { "epoch": 0.7370740456059441, "grad_norm": 0.37019091844558716, "learning_rate": 1.7750557219277126e-05, "loss": 2.6978, "step": 8990 }, { "epoch": 0.7372380220343325, "grad_norm": 0.35576122999191284, "learning_rate": 1.772984236306122e-05, "loss": 2.6686, "step": 8992 }, { "epoch": 0.737401998462721, "grad_norm": 0.3716403841972351, "learning_rate": 1.7709136995809322e-05, "loss": 2.6938, "step": 8994 }, { "epoch": 0.7375659748911094, "grad_norm": 0.37883302569389343, "learning_rate": 1.7688441123609805e-05, "loss": 2.7095, "step": 8996 }, { "epoch": 0.7377299513194978, "grad_norm": 0.3864205479621887, "learning_rate": 1.7667754752548192e-05, "loss": 2.6649, "step": 8998 }, { "epoch": 0.7378939277478862, "grad_norm": 0.36532631516456604, "learning_rate": 1.764707788870733e-05, "loss": 2.6308, "step": 9000 } ], "logging_steps": 2, "max_steps": 12197, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 3000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 7.599488558432256e+18, "train_batch_size": 8, "trial_name": null, "trial_params": null }